diff --git a/models/abi/G_100000.pth b/models/abi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b9afdd02b511950b4ae520ce6dca6666ac8152a --- /dev/null +++ b/models/abi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d49b59265a0a5f5d899dde745c21bc0a492f68f3769521d238b23a446f93b2 +size 145496039 diff --git a/models/abi/config.json b/models/abi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/abi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/abi/vocab.txt b/models/abi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4a994d1d83ffb58a9050ed3b2944571dacdeb19d --- /dev/null +++ b/models/abi/vocab.txt @@ -0,0 +1,53 @@ +ɔ +ê +ǒ + +̂ +h +ě +i +ɩ +k +á +̌ +ǐ +b +p +í +ǔ +u +ń +w +' +ί +f +ó +y +s +î +m +ɛ +έ +e +ʋ +ḿ +n +ú +o +d +â +ô +c +ǎ +é +́ +j +l +- +t +_ +r +g +ε +û +a diff --git a/models/abp/G_100000.pth b/models/abp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f02554efb6bb4ba4247e9a1e0fc15bf1f029fb8 --- /dev/null +++ b/models/abp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a87c726778281f406b80e0383b53e80f0a39fd75f3dba8c50ab17df25b97c76 +size 145480667 diff --git a/models/abp/config.json b/models/abp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/abp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/abp/vocab.txt b/models/abp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6212eeac9b50a1f9c557d0c58b81f310fb37105c --- /dev/null +++ b/models/abp/vocab.txt @@ -0,0 +1,33 @@ +_ +t +e +b +ō +j +c +r +f +w +i +q +h +g +l +m +k +y +d +ā +s +' +a +n +x +6 +o +- +p +u + +v +z diff --git a/models/aca/G_100000.pth b/models/aca/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d42b2156341a7760b04d8759b2c5a368535e28b --- /dev/null +++ b/models/aca/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6e02c38c06f326322663ec0e415aaaee53427226be1df1b3fa0ba91ebd6134 +size 145482239 diff --git a/models/aca/config.json b/models/aca/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/aca/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/aca/vocab.txt b/models/aca/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4084ed70136d3cc1b19011be47c72f83cf27cbc7 --- /dev/null +++ b/models/aca/vocab.txt @@ -0,0 +1,35 @@ +a +| +i +n +á +c +e +u +l +r +w +j +s +í +m +é +o +' +h +t +y +b +d +ú +q +ó +p +— +g +f +z +v +x +ñ + diff --git a/models/acd/G_100000.pth b/models/acd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ef34a3fd366568741ba4ac3490d7753f3fbed36 --- /dev/null +++ b/models/acd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ffad2d099b4fbfe8b434922abb825e898b105276f5e18bdc794513400f9203 +size 145476854 diff --git a/models/acd/config.json b/models/acd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/acd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/acd/vocab.txt b/models/acd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5c7d657c6db735544c06fd99177080daddff11e3 --- /dev/null +++ b/models/acd/vocab.txt @@ -0,0 +1,28 @@ +| +a +ɛ +n +ɔ +i +o +m +y +e +u +g +s +k +b +r +l +d +w +f +- +t +p +' +ŋ +h +c + diff --git a/models/ace/G_100000.pth b/models/ace/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..da389e179ee3459c10adf1d99e31918819762825 --- /dev/null +++ b/models/ace/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b520bb31ef5f6fe27b350d1cdfafc5e0e7241fc2235c0e0ffd0d2232184892 +size 145487623 diff --git a/models/ace/config.json b/models/ace/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ace/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ace/vocab.txt b/models/ace/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5370d0f0f0f212614f1bec2acd6aa7d39b4d6112 --- /dev/null +++ b/models/ace/vocab.txt @@ -0,0 +1,42 @@ +| +a +n +e +u +g +t +h +i +k +m +b +o +y +l +r +s +p +j +d +é +w +ô +ë +- +c +ö +á +ó +f +z +' +q +ú +` +0 +6 +4 +3 +1 +2 + diff --git a/models/acf/G_100000.pth b/models/acf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5db621d700623c9e3841f74c5adbdf7933ba5ad1 --- /dev/null +++ b/models/acf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b77a841ab67ccc15b895c9fb24779b94d85b83e5267992dba49a66bcd6923f93 +size 145480679 diff --git a/models/acf/config.json b/models/acf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/acf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/acf/vocab.txt b/models/acf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bbd6591007a7d7d32b96903e4d1e2e7071d4a770 --- /dev/null +++ b/models/acf/vocab.txt @@ -0,0 +1,33 @@ +| +a +n +é +i +o +s +t +k +y +p +l +w +m +è +u +d +- +e +b +v +j +ò +z +f +ʼ +h +g +c +r +— +' + diff --git a/models/ach/G_100000.pth b/models/ach/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9704b72bc4312321dacf28bed4a79fbf173b05a7 --- /dev/null +++ b/models/ach/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7cffe85baf83584a9876299052071c35e4604a5192b9ecd6402a88d70a6d7a1 +size 145476861 diff --git a/models/ach/config.json b/models/ach/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ach/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ach/vocab.txt b/models/ach/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..14f296445302e01b5d6d683ee1206b8c0d7054a9 --- /dev/null +++ b/models/ach/vocab.txt @@ -0,0 +1,28 @@ +| +o +a +i +e +k +n +m +w +t +u +y +l +c +d +b +g +r +p +ŋ +j +- +s +' +v +f +h + diff --git a/models/acn/G_100000.pth b/models/acn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..130c38afce071719d1f2513eabdd4b50f5ac8888 --- /dev/null +++ b/models/acn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:650c7c63e516d2c2c363fdc4f9ce8ac58f6b8c37c75fe1a1e4bbe1d633e8e8f0 +size 145483865 diff --git a/models/acn/config.json b/models/acn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/acn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/acn/vocab.txt b/models/acn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..249bc5cef2d99e3605690cb42aa8aae3254d9940 --- /dev/null +++ b/models/acn/vocab.txt @@ -0,0 +1,37 @@ +u +k +g +a +_ +— +d +w +i +o +- +b +e +n +t +y +p +s +z +x +m +h +c + +l +0 +2 +j +f +3 +5 +q +v +r +6 +1 +4 diff --git a/models/acr/G_100000.pth b/models/acr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a67daf6420cc2de9e2de60fae2a292ffbb2bbeda --- /dev/null +++ b/models/acr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe163f49adbb77ba29f6a49a80ed6417f00de3ed26fc7bde862ef44fbb7bb706 +size 145483857 diff --git a/models/acr/config.json b/models/acr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/acr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/acr/vocab.txt b/models/acr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..77e4d297fd3ec3aba11fb74c4ecb53620ae68fed --- /dev/null +++ b/models/acr/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +' +k +r +u +e +j +n +o +c +l +h +t +q +w +x +m +b +s +y +p +z +d +— +ú +g +á +é +ó +f +í +v +- +ñ + diff --git a/models/acu/G_100000.pth b/models/acu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7997481e250efe914c36ca4bb3b013118aa96001 --- /dev/null +++ b/models/acu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73f60dc370d4b9603a8b4f49a39f38b9ba104cc4873804e37a3742f669656bf +size 145482199 diff --git a/models/acu/config.json b/models/acu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/acu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/acu/vocab.txt b/models/acu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8a5265c61e4973a3fe8660e120f05420e52f25e7 --- /dev/null +++ b/models/acu/vocab.txt @@ -0,0 +1,35 @@ +a +| +i +n +u +t +r +m +k +s +e +h +j +c +w +y +p +g +o +í +— +ú +d +l +é +á +b +f +v +ó +z +q +x +ñ + diff --git a/models/ade/G_100000.pth b/models/ade/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..00325602dff093eee81a2c1ac0f5064036bd0b49 --- /dev/null +++ b/models/ade/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f15657ac66a20bbf85a3ff8968ac2f5f1306fe124dbf7557c9ebb35f63db43d7 +size 145486083 diff --git a/models/ade/config.json b/models/ade/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ade/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ade/vocab.txt b/models/ade/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c2dd6bded46e62603c493f1763ffd87e64925848 --- /dev/null +++ b/models/ade/vocab.txt @@ -0,0 +1,40 @@ +| +a +â +n +e +b +g +æ +i +t +w +ô +y +k +o +r +l +u +d +m +f +s +û +p +à +- +è +ã +õ +ù +î +å +ì +ü +ǹ +ò +h +' +c + diff --git a/models/adh/G_100000.pth b/models/adh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8eec0cf3182e4b4c1bbc7ae54b1f32a5bff607d9 --- /dev/null +++ b/models/adh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04406c957a29e5e62b97e3b528868d11cafbec74886e242d56f8b3501a40bebf +size 145477721 diff --git a/models/adh/config.json b/models/adh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/adh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/adh/vocab.txt b/models/adh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e09bdb8ae2b23055bbbc6091c3c079d3de54006c --- /dev/null +++ b/models/adh/vocab.txt @@ -0,0 +1,29 @@ +| +o +i +a +e +n +k +m +w +r +y +h +t +d +j +g +u +l +p +c +b +ŋ +s +f +' +z +v +- + diff --git a/models/adj/G_100000.pth b/models/adj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..be2b26c9ed543b5a702a999d9599ae6836f6dbfb --- /dev/null +++ b/models/adj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256854868625effe7ef0801e3467e22e74986dc465dc3af39f5ee138ffa009a0 +size 145490681 diff --git a/models/adj/config.json b/models/adj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/adj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/adj/vocab.txt b/models/adj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..734675457589febdbbf74363fa21218a2c9367b6 --- /dev/null +++ b/models/adj/vocab.txt @@ -0,0 +1,46 @@ +| +a +e +ɛ +n +m +l +k +i +ŋ +' +s +b +o +y +w +ɔ +u +r +g +c +t +d +j +f +p +- +ó +́ +í +̀ +á +h +é +ì +ò +ú +à +ê +v +ù +̂ +è +z +ô + diff --git a/models/adx/G_100000.pth b/models/adx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d5fe5a630b8d705ee32a85537a6999232f99c19 --- /dev/null +++ b/models/adx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62192b382120334b39670d3ae5567159358eb66361588f1b42e6cce52b825e83 +size 145500761 diff --git a/models/adx/config.json b/models/adx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/adx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/adx/vocab.txt b/models/adx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7aabb89c5b12f45477e7bdc34fe8cfe15e9a82b9 --- /dev/null +++ b/models/adx/vocab.txt @@ -0,0 +1,59 @@ +ཤ +ྐ +ི +འ +ཞ + +ྩ +པ +ྫ +ྱ +ྭ +ཙ +ད +ཁ +ྔ +ྷ +ུ +ེ +ོ +_ +ྡ +ཆ +མ +ྟ +ྤ +ན +ཉ +ཛ +ཊ +ཏ +ྦ +ྨ +ང +ཪ +ཟ +ས +ཚ +ྣ +ླ +ཇ +ྗ +ཨ +ྙ +ྒ +བ +ཅ +ཧ +ྲ +ཝ +ཡ +ཱ +ག +ཀ +ར +ཐ +ཕ +ྕ +ལ +་ diff --git a/models/aeu/G_100000.pth b/models/aeu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d463fff3801f3e91a254ca654817f4685c05bf60 --- /dev/null +++ b/models/aeu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2d9b805eb84438ab960cb51ee261fdaf62280f1b1c0ec27449692ecc857ff8f +size 145474559 diff --git a/models/aeu/config.json b/models/aeu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/aeu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/aeu/vocab.txt b/models/aeu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c54a7f4738f10defc9b2bfa2d2d5361d2f1181ea --- /dev/null +++ b/models/aeu/vocab.txt @@ -0,0 +1,25 @@ +b +j +g +t +u +- +k +q +s +i +e +l +o +p +_ +y +n +c +v + +m +h +d +a +w diff --git a/models/agd/G_100000.pth b/models/agd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..682fc48682aa625c37691781b05a45dbfbb33404 --- /dev/null +++ b/models/agd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3724c7877b954ab715ef920ca771336f4e3cf1e7596db81cbd11893f035588c +size 145478407 diff --git a/models/agd/config.json b/models/agd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/agd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/agd/vocab.txt b/models/agd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b96e7aae09da8a03795308a1cd2188e117e7981 --- /dev/null +++ b/models/agd/vocab.txt @@ -0,0 +1,30 @@ +3 +p +y +f +ó +4 +0 +á +m +- +r +h + +t +d +o +é +' +s +b +e +n +u +_ +v +ú +í +a +i +k diff --git a/models/agg/G_100000.pth b/models/agg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..577fe7e006b3797b7703c4ec85c302654c45c9d9 --- /dev/null +++ b/models/agg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8aa1936c08d0178a46cc741a42e231732c4955096833c58371b5fd4ceefe678 +size 145480667 diff --git a/models/agg/config.json b/models/agg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/agg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/agg/vocab.txt b/models/agg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..facf0a9e4869c6abd24fbab16bde1637e8dd0432 --- /dev/null +++ b/models/agg/vocab.txt @@ -0,0 +1,33 @@ +a +| +ɨ +o +n +m +h +r +u +d +i +b +e +s +ü +y +f +g +ŋ +k +w +p +t +- +0 +1 +7 +2 +4 +5 +6 +3 + diff --git a/models/agn/G_100000.pth b/models/agn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..09253fc6eb1a8d6dc145c88fadff1f271be68f06 --- /dev/null +++ b/models/agn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c47cccb5887edae65a10706f16dfe6e019f866e9753dba83302c1f68ea5ad48 +size 145486083 diff --git a/models/agn/config.json b/models/agn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/agn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/agn/vocab.txt b/models/agn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..29c88df40b9a8c0a3aba63215a6a5777f1832f77 --- /dev/null +++ b/models/agn/vocab.txt @@ -0,0 +1,40 @@ +a +| +n +g +i +o +t +m +d +s +l +e +p +r +k +b +y +w +- +u +j +c +h +f +z +v +q +' +ā +ō +0 +x +1 +— +6 +ē +5 +2 +7 + diff --git a/models/agr/G_100000.pth b/models/agr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ecb78190ec817a474a98eebf2bc5227f1eaa4219 --- /dev/null +++ b/models/agr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5242aa22fee364f32c16ce75f3ea33cb71aef9cf9a7b705d70b8492e7e35c378 +size 145484531 diff --git a/models/agr/config.json b/models/agr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/agr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/agr/vocab.txt b/models/agr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..753b549be4b59b79f33227dc7439772b7584fa70 --- /dev/null +++ b/models/agr/vocab.txt @@ -0,0 +1,38 @@ +a +| +i +u +n +t +k +m +j +s +e +g +h +w +d +p +c +y +í +b +r +o +á +ú +l +— +é +f +' +v +z +ó +q +x +ñ +2 +7 + diff --git a/models/agu/G_100000.pth b/models/agu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..426fad894c06c84b3b2c8e6b7f3f45795a2b226d --- /dev/null +++ b/models/agu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd2ab31078a282d004930ec2736403c8da29669e81a52afa5845633dfda670c1 +size 145485919 diff --git a/models/agu/config.json b/models/agu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/agu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/agu/vocab.txt b/models/agu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c49027bc328668cc91d2dd5c6d7a5dc1a271e664 --- /dev/null +++ b/models/agu/vocab.txt @@ -0,0 +1,40 @@ +j +r +ó +b +é +s +t +v +g +i +x +k +f + +d +— +6 +y +z +c +h +u +m +p +a +ẍ +- +w +o +n +e +' +ꞌ +ñ +ú +í +á +_ +q +l diff --git a/models/agx/G_100000.pth b/models/agx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b593ca56fc326e110c820b2cb92d6db8d9e94e83 --- /dev/null +++ b/models/agx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf3a1cf43e91c178ab9ff73e26e927f283c4672c8c09c3c1da2ba194b99faaad +size 145481467 diff --git a/models/agx/config.json b/models/agx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/agx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/agx/vocab.txt b/models/agx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9d3f1c172babfb0e8abf8ef91f48c1b6b135f073 --- /dev/null +++ b/models/agx/vocab.txt @@ -0,0 +1,34 @@ +а +| +и +у +н +е +р +с +г +ь +к +д +л +х +ӏ +т +в +ъ +й +ч +я +м +б +п +ф +з +ш +э +ж +– +ц +ю +о + diff --git a/models/aha/G_100000.pth b/models/aha/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..99734c9b060437151735fce61363644b434c8c41 --- /dev/null +++ b/models/aha/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d49c277cfd6eb7f243ff0199cdc6168976758d50e51ac8340374a114377e00 +size 145491456 diff --git a/models/aha/config.json b/models/aha/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/aha/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/aha/vocab.txt b/models/aha/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dc236e9316bd8128d0f3a9b605c1dc0e71c2923e --- /dev/null +++ b/models/aha/vocab.txt @@ -0,0 +1,47 @@ +| +ɩ +n +a +ɛ +m +i +l +ɔ +y +b +ʋ +e +w +k +u +d +s +z +o +t +h +̃ +g +v +ɣ +p +f +á +ĩ +ã +́ +í +é +ũ +0 +ó +' +ú +1 +2 +5 +4 +6 +3 +7 + diff --git a/models/ahk/G_100000.pth b/models/ahk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..724e769d3f8475355720eb17340b544bcb2b3cbb --- /dev/null +++ b/models/ahk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05c503fc11b670f91bee458eef22af0d6ff6a7e375d8dae9efe78b42c43da71a +size 145478399 diff --git a/models/ahk/config.json b/models/ahk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ahk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ahk/vocab.txt b/models/ahk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cf8112899696e4294a029010d68899b309132025 --- /dev/null +++ b/models/ahk/vocab.txt @@ -0,0 +1,30 @@ +| +a +ˬ +e +h +u +- +w +m +i +̭ +n +y +' +l +g +d +s +k +t +o +ˆ +j +b +p +z +c +` +f + diff --git a/models/aia/G_100000.pth b/models/aia/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d02d39944fdccba1fff7a87ab2650088a47bfbd --- /dev/null +++ b/models/aia/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc46fb2e925c19e67cf102d118cceae463568bee1670755b1e03e31c442f840 +size 145477597 diff --git a/models/aia/config.json b/models/aia/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/aia/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/aia/vocab.txt b/models/aia/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..112d50bb3e5421c07bd690138eaf447cd251f603 --- /dev/null +++ b/models/aia/vocab.txt @@ -0,0 +1,29 @@ +d +j +a +t +c +y + +m +g +v +6 +_ +b +u +e +w +- +h +p +ꞌ +r +f +s +l +n +o +— +i +k diff --git a/models/aka/G_100000.pth b/models/aka/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5854dd5faa29bed831d9c0d2ee9344b825550877 --- /dev/null +++ b/models/aka/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd62d03102a5ddb7a6c86f655882969f1307271424cc62861dac19a1134a88b +size 145478399 diff --git a/models/aka/config.json b/models/aka/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/aka/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/aka/vocab.txt b/models/aka/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1acf3892d40041550f0dfb085f24afcde0768cbb --- /dev/null +++ b/models/aka/vocab.txt @@ -0,0 +1,30 @@ +a +ʼ +t +- +n +' +_ +3 + +p +m +á +w +y +ɛ +f +o +g +u +k +h +l +s +2 +e +r +i +ɔ +d +b diff --git a/models/akb/G_100000.pth b/models/akb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb463bdd12731219e7911e2365e75b84b3fcee2c --- /dev/null +++ b/models/akb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754b1fe147cbf69765126955675812c100486c4e6869447134d063e277413ae9 +size 145481473 diff --git a/models/akb/config.json b/models/akb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/akb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/akb/vocab.txt b/models/akb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4c804c545e6ca6e7738a9cdad85f43358d73b231 --- /dev/null +++ b/models/akb/vocab.txt @@ -0,0 +1,34 @@ +4 +g +n +l +k +w +0 +r +j +1 + +s +i +5 +u +z +v +b +- +' +2 +h +p +e +d +t +m +y +_ +a +c +f +6 +o diff --git a/models/ake/G_100000.pth b/models/ake/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e497b24091ce988c2c30f5e78762456237f45146 --- /dev/null +++ b/models/ake/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3814d3f43eed90bd47d626bda5fa139aebddd3227193fb633ff941de4d596c6 +size 145479943 diff --git a/models/ake/config.json b/models/ake/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ake/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ake/vocab.txt b/models/ake/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b891524633a9322b1c586fa2d934558abdcaae8b --- /dev/null +++ b/models/ake/vocab.txt @@ -0,0 +1,32 @@ +– +y +w +1 +k +7 +ɨ + +r +0 +_ +p +n +8 +9 +t +u +' +s +i +3 +- +a +5 +6 +o +e +4 +ʉ +— +m +2 diff --git a/models/akp/G_100000.pth b/models/akp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc7b3c2768f2f0ab835bca42254c29d58ef6d985 --- /dev/null +++ b/models/akp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ce40d1208d2eb81230afe04f6a68d0984f2eeb15f8c9c174da0f0ee84895df +size 145482225 diff --git a/models/akp/config.json b/models/akp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/akp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/akp/vocab.txt b/models/akp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0628e29ad981378df423612d210d722ae1d36b23 --- /dev/null +++ b/models/akp/vocab.txt @@ -0,0 +1,35 @@ +t +' +ɖ +h +- +r +n +p +ɣ +a +l +v +ɛ +g +ã +o +z +m +i +d +w +f +ɔ +ĩ +s +u +6 +b + +̃ +ũ +e +y +k +_ diff --git a/models/alj/G_100000.pth b/models/alj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..96984f0e12de3e73dcea727acd0539d097ec4cb7 --- /dev/null +++ b/models/alj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1091bb729d753d0e547c3858a2feb790434f91340dfb8e68975e86fd6a8dfdd5 +size 145474549 diff --git a/models/alj/config.json b/models/alj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/alj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/alj/vocab.txt b/models/alj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d36ec1f02a68f4cd113cae00aa88d92d77233c61 --- /dev/null +++ b/models/alj/vocab.txt @@ -0,0 +1,25 @@ +a +| +n +i +o +g +y +k +s +t +m +p +e +b +w +l +d +r +0 +4 +1 +5 +3 +ɩ + diff --git a/models/alp/G_100000.pth b/models/alp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..450514f21eba8b06abb32ae24160c425b1b92ee0 --- /dev/null +++ b/models/alp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506cf520d2565645b437f69c74c9ab4b5d1acd3eb4de1bfb462c04194796b58a +size 145478385 diff --git a/models/alp/config.json b/models/alp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/alp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/alp/vocab.txt b/models/alp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f12d463b3d6c1f9a35aeb6548c973b70c835c5da --- /dev/null +++ b/models/alp/vocab.txt @@ -0,0 +1,30 @@ +j +- +y +1 +c +' +_ +r +e +t +u +b +g +w +l +i +o +2 +f +4 +s +h +n +m +k +a +0 + +p +d diff --git a/models/alt/G_100000.pth b/models/alt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f20c22dc42e361bfec6e98e31ef0c3f71de2e199 --- /dev/null +++ b/models/alt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e097c0c9d0fa0111d9a991b67c42cc16b16af32a559d604e591e4bc880795f0 +size 145490667 diff --git a/models/alt/config.json b/models/alt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/alt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/alt/vocab.txt b/models/alt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..60774c5cf6765a3326c036c4027166498a2d9e80 --- /dev/null +++ b/models/alt/vocab.txt @@ -0,0 +1,46 @@ +| +а +е +р +ы +н +л +и +к +д +о +т +у +г +б +с +й +п +ј +ӱ +ҥ +м +ч +з +ӧ +ж +ш +э +– +- +в +х +ф +я +ь +1 +2 +ц +ю +ÿ +4 +r +j +6 +0 + diff --git a/models/alz/G_100000.pth b/models/alz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..08f4e7e2c1cee46700bcae4379c633b79b3cc2d7 --- /dev/null +++ b/models/alz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7d4339436a25fd36402011d65fb777da9f640fec533d8e5f79b6e8f7a6a4c5 +size 145476068 diff --git a/models/alz/config.json b/models/alz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/alz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/alz/vocab.txt b/models/alz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ce9a6b346785f479f11553eca17391b47697d75c --- /dev/null +++ b/models/alz/vocab.txt @@ -0,0 +1,27 @@ +| +a +i +e +n +u +o +m +k +g +d +w +r +t +b +c +y +l +' +h +p +j +s +f +v +z + diff --git a/models/ame/G_100000.pth b/models/ame/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e04f288d9f3d37d2a5cfb46b8fd03abbe60186c --- /dev/null +++ b/models/ame/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60e18082dae54de6f904a6dfc0ba50f194936c090ae635b685d6280552ca8d9 +size 145485323 diff --git a/models/ame/config.json b/models/ame/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ame/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ame/vocab.txt b/models/ame/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d2cdc86492abb2ff429c3a907773efb4632a8c2 --- /dev/null +++ b/models/ame/vocab.txt @@ -0,0 +1,39 @@ +í +x +i +_ +b +v +y +w +q +n + +p +ë +u +j +ú +ñ +g +r +s +é +t +d +z +c +- +e +a +k +h +ó +o +á +f +— +2 +l +m +' diff --git a/models/amf/G_100000.pth b/models/amf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..63d07ebc0e0b154187ea6c13041e391359adffbe --- /dev/null +++ b/models/amf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e54ba5946eb8b571ab2d8fadabe6e24b6e3b1069345b60f59276a5a98ec1f69 +size 145477075 diff --git a/models/amf/config.json b/models/amf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/amf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/amf/vocab.txt b/models/amf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5f1d3e54a3120d2c6003570da9230c45a635bd0e --- /dev/null +++ b/models/amf/vocab.txt @@ -0,0 +1,28 @@ +z +k +v +a +n +u +x +l +j +p +w +g +h +t +_ +m +s +c +e +y +o + +i +q +r +' +d +b diff --git a/models/amh/G_100000.pth b/models/amh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba5811f7dd2ddb1f88110e829642dfac7c03585d --- /dev/null +++ b/models/amh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a33ef5e1df10612ebb8d3247a1584fd743b6ab33ed34101f5e76e906c96957d9 +size 145476821 diff --git a/models/amh/config.json b/models/amh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..993d1dedb1d0c8e820b98f9e2f019ff166327038 --- /dev/null +++ b/models/amh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.uroman", + "validation_files": "dev.uroman", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/amh/vocab.txt b/models/amh/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..59fcb3668654565a9b6ddde9b45e26d542af6e45 --- /dev/null +++ b/models/amh/vocab.txt @@ -0,0 +1,28 @@ +c +_ +l +f +p +e +m +j +r +h +o +z + +s +' +t +n +u +q +b +w +a +k +x +i +y +d +g diff --git a/models/ami/G_100000.pth b/models/ami/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0e709494232b48f7bfb95682f5eb0e6f0db9d90 --- /dev/null +++ b/models/ami/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff307370e2db5dc1ec19ab5c4226f805cef82d97761243ebdb2f9ab56fdad5a2 +size 145475333 diff --git a/models/ami/config.json b/models/ami/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ami/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ami/vocab.txt b/models/ami/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4ed52000213ebf3db387d5fa31ef405dac98319b --- /dev/null +++ b/models/ami/vocab.txt @@ -0,0 +1,26 @@ +a +| +o +n +i +k +t +m +y +s +r +l +w +c +p +g +d +' +h +e +f +u +x +0 +- + diff --git a/models/amk/G_100000.pth b/models/amk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..496f5ec05b06eeb6f1e58f58be709b0a1480df9f --- /dev/null +++ b/models/amk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad693bfdf6c4f1bfcffba74ca14ddc00d8f1cc16b4b25bea91b79575c077d68 +size 145482221 diff --git a/models/amk/config.json b/models/amk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/amk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/amk/vocab.txt b/models/amk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..498fec475f498ae2d7932d81028ceb6ae6afb89a --- /dev/null +++ b/models/amk/vocab.txt @@ -0,0 +1,35 @@ +h +z +e +o +2 +a +u +_ +- +j +' +g + +p +d +c +l +0 +t +w +3 +— +6 +y +k +f +m +i +1 +b +7 +n +r +4 +s diff --git a/models/ann/G_100000.pth b/models/ann/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..78d2a06d4c797024926d4d37ba6987647803c821 --- /dev/null +++ b/models/ann/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5de61022a2ba72a186712e35359fe13f89753c98cc6728459fc2d28939df78b9 +size 145486071 diff --git a/models/ann/config.json b/models/ann/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ann/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ann/vocab.txt b/models/ann/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a588477c29ffa1d306ed19e931821c36ef061c9a --- /dev/null +++ b/models/ann/vocab.txt @@ -0,0 +1,40 @@ +| +e +i +a +n +k +u +m +ọ +b +o +t +̄ +y +g +r +w +s +j +l +p +è +ì +h +c +f +ò +î +d +- +ô +ǹ +ê +â +̀ +à +ù +' +û + diff --git a/models/any/G_100000.pth b/models/any/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4a031d2cf2db78b1fc458fac799d39bad3aaee2 --- /dev/null +++ b/models/any/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb792f095b0813b58542fc063753b81d39b76328dead5b6d14043218f5cdcf9 +size 145485293 diff --git a/models/any/config.json b/models/any/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/any/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/any/vocab.txt b/models/any/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..251a714a2366b265af343d88bad4b52f4a942557 --- /dev/null +++ b/models/any/vocab.txt @@ -0,0 +1,39 @@ +ʋ +í +m +ó +a +t +u +ɛ +̂ +ú +c +ɔ +' +́ +á +_ +l +d +y +p +b +é +g +n +j +v +â +s +f +e + +k +ɩ +i +z +o +- +w +h diff --git a/models/aoz/G_100000.pth b/models/aoz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c7ebd572ea8facda8aa6ec52bbb0c65f51bb48c --- /dev/null +++ b/models/aoz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d998acd04527f6d583faa8c664a7f840b50a09343e02b1d9efe9d5c854e731d +size 145473765 diff --git a/models/aoz/config.json b/models/aoz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/aoz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/aoz/vocab.txt b/models/aoz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..422e66602b282d6637fb9c05dcd9ea13af1ad52d --- /dev/null +++ b/models/aoz/vocab.txt @@ -0,0 +1,24 @@ +| +a +n +i +e +s +u +o +m +l +t +k +' +h +f +b +g +p +y +j +- +r +d + diff --git a/models/apb/G_100000.pth b/models/apb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbe690f790c774bb77365a8e8793377a1a668f6a --- /dev/null +++ b/models/apb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4123ddc886fad816bd3fa675228d07224e1612f95fa2fd8303efd2294f6b8fd5 +size 145484533 diff --git a/models/apb/config.json b/models/apb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/apb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/apb/vocab.txt b/models/apb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e2b8659c6bcc80042b78d1aa9a3c4d061eb1be7d --- /dev/null +++ b/models/apb/vocab.txt @@ -0,0 +1,38 @@ +| +a +e +i +o +n +u +l +m +ꞌ +k +h +t +r +g +s +w +d +p +- +j +b +1 +v +2 +3 +f +5 +4 +6 +8 +9 +0 +7 +— +' +z + diff --git a/models/apr/G_100000.pth b/models/apr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fde6f80d3adbea20430f2f417a95161d7aa6e17c --- /dev/null +++ b/models/apr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5b063b32bfbd382419060071d139238ef29becf77aa38916f4c9654158fa1d +size 145479249 diff --git a/models/apr/config.json b/models/apr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/apr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/apr/vocab.txt b/models/apr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..94dd9d7305656ddaf39af1864fb0b10c63186dc6 --- /dev/null +++ b/models/apr/vocab.txt @@ -0,0 +1,31 @@ +3 +n +y +r +9 +d +l +k +4 + +2 +6 +' +s +_ +a +1 +i +t +p +o +7 +w +5 +u +b +g +8 +e +0 +m diff --git a/models/ara/G_100000.pth b/models/ara/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..645bf020c3edd7538cc48caaa6625ec1ce622fe1 --- /dev/null +++ b/models/ara/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e120ca88cff5fe478d5f72c354077e378684269c516a7d325f2fa953b0c6780 +size 145485191 diff --git a/models/ara/config.json b/models/ara/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ara/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ara/vocab.txt b/models/ara/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..d1be48458cd2285ede825d5c2fc1efe5245d7ec3 --- /dev/null +++ b/models/ara/vocab.txt @@ -0,0 +1,39 @@ +ا +ن +ك +ع +إ +غ +ذ +ة +س +ر +ط +خ +ت +ج +ظ +ي +د +– +ص +ث +أ +ى +ض +ح +ه + +ء +آ +ب +و +م +ل +ش +ق +ز +ؤ +ف +_ +ئ diff --git a/models/arl/G_100000.pth b/models/arl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dda1308265f7d5797e3387cfbc3ca1e4c649109d --- /dev/null +++ b/models/arl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de2a7663909eb44edc0b84023ddb60aa143b12113f19bb008b2758a750b0c49 +size 145481445 diff --git a/models/arl/config.json b/models/arl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/arl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/arl/vocab.txt b/models/arl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cbd23864925a1e9d45edb4708d97646031e02df3 --- /dev/null +++ b/models/arl/vocab.txt @@ -0,0 +1,34 @@ +d +i +n +x +o +v +s +' +h +é +r +e +y +b +g +t +k + +ó +_ +l +í +c +á +a +— +q +u +z +ú +f +j +p +m diff --git a/models/asa/G_100000.pth b/models/asa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6abe3d2fa0a70b2064fae919de9b1b819ad024b4 --- /dev/null +++ b/models/asa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1390660466c65e6ea4f677bcbe477b4f75de5e1531c9eb528852fff6a8fbbcf +size 145476859 diff --git a/models/asa/config.json b/models/asa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/asa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/asa/vocab.txt b/models/asa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..496448d2f597d7247eed9f59390e8ab8c03e85d1 --- /dev/null +++ b/models/asa/vocab.txt @@ -0,0 +1,28 @@ +z +m +w +_ +- +p +b +t +e +y + +i +f +a +c +u +n +o +l +v +r +g +j +s +k +d +h +' diff --git a/models/asg/G_100000.pth b/models/asg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ab5cf24aa264188acca9e91a086cdb65b758ebe --- /dev/null +++ b/models/asg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ae1edc8a49965b197da0eeae5a27fe49199fc445ddb0f1c81a92b5070007578 +size 145479911 diff --git a/models/asg/config.json b/models/asg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/asg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/asg/vocab.txt b/models/asg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e4b15fdec3f0b61d90e9b02d11b55c967309568e --- /dev/null +++ b/models/asg/vocab.txt @@ -0,0 +1,32 @@ +| +a +i +u +n +k +̱ +e +m +s +t +o +y +l +ɗ +b +g +v +d +h +w +p +c +ꞌ +z +r +f +ɓ +j +- +' + diff --git a/models/asm/G_100000.pth b/models/asm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..532827b6707be4961a844b7d6b75b278cb971954 --- /dev/null +++ b/models/asm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51cd0ff8cab466525f769ea005a71e47990cd6c5355c0f5e1f7d697edcf13dea +size 145506127 diff --git a/models/asm/config.json b/models/asm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/asm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/asm/vocab.txt b/models/asm/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..57faf47a6b7be879f615c4c89e59e2a2ae1a9240 --- /dev/null +++ b/models/asm/vocab.txt @@ -0,0 +1,66 @@ +| +ৰ +া +ক +ে +ি +ত +ন +্ +ল +ো +য +ব +প +ম +ু +স +আ +হ +় +ই +দ +ঁ +ও +জ +ী +শ +চ +ৈ +গ +ৱ +ছ +এ +ধ +থ +ষ +ভ +অ +ণ +খ +' +ট +র +ূ +ঈ +উ +ৃ +- +ঠ +ঘ +ফ +ং +ঞ +ড +ৌ +ৎ +ঙ +ঢ +ঃ +ঐ +ঋ +0 +2 +— +‍ + diff --git a/models/ata/G_100000.pth b/models/ata/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..381fa6b07d85f2993a20a60fa749660697a06355 --- /dev/null +++ b/models/ata/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7eab099a03e7a1f93139b0ff93da1ce3cf5ec6568be733d85eb86f98139d494 +size 145480705 diff --git a/models/ata/config.json b/models/ata/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ata/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ata/vocab.txt b/models/ata/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d8ea470aba46f88ec12765479c5b2e39768a0990 --- /dev/null +++ b/models/ata/vocab.txt @@ -0,0 +1,33 @@ +' +i +7 +u +v +_ +b +p +z +d +r +1 +f +– +o +h +g +t +0 + +a +k +- +6 +m +4 +l +2 +5 +e +n +s +x diff --git a/models/atb/G_100000.pth b/models/atb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebeed0c281cbd90a4cfcc1606a3ac7f342a6d793 --- /dev/null +++ b/models/atb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b64851b91a7c925354c7cc90d92a2845b0ab1ede004e22a4f229a96dc20a5fb +size 145493635 diff --git a/models/atb/config.json b/models/atb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/atb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/atb/vocab.txt b/models/atb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..acb63a02994a9540912a14ab0a2b3e1a2a6ccfdf --- /dev/null +++ b/models/atb/vocab.txt @@ -0,0 +1,50 @@ +_ +s +c +î +4 +z +ò +q +à +ù +- +0 +x +ê +o +u +' +ú +n +k +r +w +‐ +é +j +è +y +g +a +á +ô +i +b +h +m +e +t +8 +l +6 +í +p +ó +1 +ì +û + +d +â +2 diff --git a/models/atg/G_100000.pth b/models/atg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..456132a4fc0d973156069056dba3e4b98a6eb6aa --- /dev/null +++ b/models/atg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20716f09e9ffd8c5dd770b98923c0e2fdfc0c9595218ad0ab639be6df471e0f +size 145493843 diff --git a/models/atg/config.json b/models/atg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/atg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/atg/vocab.txt b/models/atg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1b7f8244ca525f531afa8ae22f7a766380846fa1 --- /dev/null +++ b/models/atg/vocab.txt @@ -0,0 +1,50 @@ +g +l +y +b +h +v +1 +d +z +o +̀ +4 +_ +ú +t + +i +ó +0 +s +á +3 +ò +̌ +m +w +p +̄ +u +ọ +é +7 +ẹ +́ +k +a +n +- +ā +2 +è +f +à +6 +e +j +c +r +' +ù diff --git a/models/ati/G_100000.pth b/models/ati/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..423c251522ecaedcd087b390d73d0f2fdfd1de5f --- /dev/null +++ b/models/ati/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c97f0487d3c748e26fd977236ae265f6df9be572dcb021e8f27a9eb44fda0f2 +size 145479155 diff --git a/models/ati/config.json b/models/ati/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ati/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ati/vocab.txt b/models/ati/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7977571010ec658116138ab86298be0ff43b4b63 --- /dev/null +++ b/models/ati/vocab.txt @@ -0,0 +1,31 @@ +| +' +n +a +ɛ +i +k +e +b +u +ɔ +h +m +s +l +ö +z +y +o +p +ë +t +f +d +w +g +r +j +v +c + diff --git a/models/atq/G_100000.pth b/models/atq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d91e4fab2f5686ba53240f970079fc99caffc2cc --- /dev/null +++ b/models/atq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe63adf4245a0e6720608a407c1a001512d33138beeb0e9677ed95b09570c51d +size 145476868 diff --git a/models/atq/config.json b/models/atq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/atq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/atq/vocab.txt b/models/atq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1717dc76394e3258d064db34d4c53f84ee4c990d --- /dev/null +++ b/models/atq/vocab.txt @@ -0,0 +1,28 @@ +l +w +o +n +a +' +r +j +b +d +m +f +p +c +e +- +k +u +i +s +_ +y + +g +h +ä +z +t diff --git a/models/ava/G_100000.pth b/models/ava/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..679a0ca374e2392d3225c12861ff464d2e42704c --- /dev/null +++ b/models/ava/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17da26e418fb8d4bdbf936716a8b2bd6df7b996c5a87996a458fb37b52632b70 +size 145483776 diff --git a/models/ava/config.json b/models/ava/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ava/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ava/vocab.txt b/models/ava/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c2e20ffd8cb6856b8fa3cea6e62625fd286af853 --- /dev/null +++ b/models/ava/vocab.txt @@ -0,0 +1,37 @@ +о +н +п +х +- +з +ъ +э +ж +ю +ӏ +м +л +д +я +б +г +с +у +ь +_ +е +– +й +в +ч +ф +щ +ё +т +и +к +ш + +а +р +ц diff --git a/models/avn/G_100000.pth b/models/avn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ad4434f542f566918bab613a4f6deb399b87abc --- /dev/null +++ b/models/avn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1c510789b19a2c8e5bad036397163af24417e0f2fc9b905e37fd6b457f74fca +size 145492973 diff --git a/models/avn/config.json b/models/avn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/avn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/avn/vocab.txt b/models/avn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..75cc584b7fc802a5bdc9fee65a1adf0fffae0d77 --- /dev/null +++ b/models/avn/vocab.txt @@ -0,0 +1,49 @@ +x +d +i +í +ʋ +s +á +y +̀ +w +o +ũ +k +' +ɛ +é +u +r +ƒ +e +ò +ú +m +t +n +h +ì +ù +è +ɖ +ŋ +ɔ +z +g +́ +à +c +_ +v +l +p +f +ĩ +b +ó +ã + +a +̃ diff --git a/models/avu/G_100000.pth b/models/avu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..75555e760b205c0990f32625301d48f809421480 --- /dev/null +++ b/models/avu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a240ecfd4ec05dfc98261b003db2bcea84805ee6bfb9db66a9dbb53fcef11b0c +size 145499103 diff --git a/models/avu/config.json b/models/avu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/avu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/avu/vocab.txt b/models/avu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ba780cad86f4280332f47d8403796e69d11548c2 --- /dev/null +++ b/models/avu/vocab.txt @@ -0,0 +1,57 @@ +| +r +ʼ +á +ã +l +â +d +î +ĩ +í +n +t +ó +õ +g +y +b +k +ị +é +ô +s +m +̃ +z +́ +ẽ +j +v +ê +ạ +ú +ũ +p +̂ +- +f +û +c +ậ +w +ṇ +' +1 +2 +h +4 +3 +5 +8 +6 +9 +0 +7 +i + diff --git a/models/awa/G_100000.pth b/models/awa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2cc098bc443a3726c87e9d0000f5c1670bef52f --- /dev/null +++ b/models/awa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0167a20ff70be300fed2033aab431fac48880e36be27985ed64e201d29f0b36 +size 145515345 diff --git a/models/awa/config.json b/models/awa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/awa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/awa/vocab.txt b/models/awa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8a41754d812d6fbc79f2bcf79141b0a5dc9f03df --- /dev/null +++ b/models/awa/vocab.txt @@ -0,0 +1,78 @@ +| +ा +क +र +ह +न +स +े +त +म +ि +इ +उ +ब +प +् +ी +अ +ज +ल +ो +ँ +व +द +य +ू +ु +ग +च +ओ +ं +आ +भ +ख +ए +ई +ट +ध +छ +ड +़ +फ +थ +ठ +ण +ौ +घ +ढ +झ +ै +ऊ +ऍ +- +ृ +ऩ +ॅ +ञ +ः +ॉ +' +0 +ष +ऐ +श +1 +2 +औ +ऋ +6 +4 +9 +ऎ +7 +ऱ +i +3 +5 + diff --git a/models/awb/G_100000.pth b/models/awb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..07eb7c81f1c8d9600c14d857399a947b69d1bb61 --- /dev/null +++ b/models/awb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5004db8b6c7eeadfff241b4c1d2a480aa1fdb61b14c4a1ea2b57d2574208de12 +size 145482215 diff --git a/models/awb/config.json b/models/awb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/awb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/awb/vocab.txt b/models/awb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ebab784d722469643f9e56902497b1e272e4017d --- /dev/null +++ b/models/awb/vocab.txt @@ -0,0 +1,35 @@ +| +a +e +n +r +h +i +á +w +é +í +m +k +t +s +q +o +b +u +g +p +ó +ú +y +0 +1 +2 +4 +5 +3 +7 +6 +8 +9 + diff --git a/models/ayo/G_100000.pth b/models/ayo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7ad5688fcf2611d96071c3756fbc40c569afc571 --- /dev/null +++ b/models/ayo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:630c0bdf16249c476410cb5b69064c6f9b7a0a6670ad38e34f0997ee4bf272cf +size 145499097 diff --git a/models/ayo/config.json b/models/ayo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ayo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ayo/vocab.txt b/models/ayo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6f8e7ef9165ebaf84590c95dcb3f98cccf625c74 --- /dev/null +++ b/models/ayo/vocab.txt @@ -0,0 +1,57 @@ +0 +c +z +ñ +j + +ẽ +v +a +ṍ +q +4 +ó +­ +2 +_ +̃ +õ +7 +y +6 +o +í +ã +b +- +t +s +ú +́ +– +u +1 +g +r +d +3 +ṹ +5 +x +9 +p +l +k +m +n +e +ũ +8 +h +ı +é +á +i +f +ĩ +' diff --git a/models/ayr/G_100000.pth b/models/ayr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a5683cdbaf61320de3c8035c8b6d873d855f0df --- /dev/null +++ b/models/ayr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a57f6db2063254101186b4cec23f9c1409332146414442a58e107aa76d7869ad +size 145486941 diff --git a/models/ayr/config.json b/models/ayr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ayr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ayr/vocab.txt b/models/ayr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24edecfac0881010517777e4720e0545e19ee73a --- /dev/null +++ b/models/ayr/vocab.txt @@ -0,0 +1,41 @@ +a +| +i +t +k +n +u +s +p +r +m +j +x +h +w +y +q +c +l +ä +ñ +' +o +e +d +ï +— +b +g +f +ü +v +í +é +ó +z +á +ö +ú +ë + diff --git a/models/ayz/G_100000.pth b/models/ayz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c54ec42b6b08e7823c1b2664a606f16ed0a9a74 --- /dev/null +++ b/models/ayz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f7a0135dfc999775074c945830200d5c70dae19bdd1cafc9e12ee1e2c3cfed +size 145486073 diff --git a/models/ayz/config.json b/models/ayz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ayz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ayz/vocab.txt b/models/ayz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cee9ccd90bbfc03f6d8fc62c057fe618bc0bfbf2 --- /dev/null +++ b/models/ayz/vocab.txt @@ -0,0 +1,40 @@ +2 +a +f +9 +r +u +_ +m +d +5 +j +s +4 +7 +e +o +b + +p +6 +n +' +t +ʼ +y +i +l +z +- +g +c +h +0 +õ +v +1 +w +8 +3 +k diff --git a/models/azb/G_100000.pth b/models/azb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d55150960bf11d0bbe9d7d50beef74a07fcf52f --- /dev/null +++ b/models/azb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8af9631552cdc695927e2f9da733eff672c19a7223b1438fceb26b28e499483 +size 145485397 diff --git a/models/azb/config.json b/models/azb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/azb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/azb/vocab.txt b/models/azb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..992cad17a241a9c5adf30b3b3194a20f0af4d72d --- /dev/null +++ b/models/azb/vocab.txt @@ -0,0 +1,39 @@ +ن +ت +- +ص +پ +ث +ه +گ +ح +ا +آ +ک +ع +أ +خ +س +م +ؤ +ل +و +ئ +ی +_ +ر +ژ +غ +ج +د +ظ +ذ +ب +ق +چ + +ط +ض +ش +ز +ف diff --git a/models/azg/G_100000.pth b/models/azg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..961d456e6c12ee021aede5fd262d50141c06fa99 --- /dev/null +++ b/models/azg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7c5c145e697cc459a16c69f213e0bf7bc26e7069c2e6e5284a0826e7917e7b +size 145485181 diff --git a/models/azg/config.json b/models/azg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/azg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/azg/vocab.txt b/models/azg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ed7f26052fe01dcf299a4b71e636b1602c9c6e68 --- /dev/null +++ b/models/azg/vocab.txt @@ -0,0 +1,39 @@ +ú +k +í +' +n +v +l +ö +_ +z +t +o +x +ñ +y +q +ü +s +ë +́ +— +é +a +u +e +g +m +j +c +ó +á +i +b +f + +p +r +h +d diff --git a/models/azj-script_cyrillic/G_100000.pth b/models/azj-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ccf4b9271ff313b9ebc1e51da693492a2ea501a2 --- /dev/null +++ b/models/azj-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:915b78eaac92dd219e2c7632029e25ba6527c4823ae3497a74cf7c1981acc1fa +size 145483859 diff --git a/models/azj-script_cyrillic/config.json b/models/azj-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/azj-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/azj-script_cyrillic/vocab.txt b/models/azj-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f2906cfe66d9f137a7c4de45b17925c07ef719de --- /dev/null +++ b/models/azj-script_cyrillic/vocab.txt @@ -0,0 +1,37 @@ +| +а +ж +и +н +р +л +д +э +м +с +ј +з +б +у +т +е +ь +о +к +г +һ +ю +в +ц +ҝ +х +ҹ +п +ф +- +– +0 +1 +2 +4 + diff --git a/models/azj-script_latin/G_100000.pth b/models/azj-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..06390ce1ac0d84a384d0a01f29bb9b556bffe9bb --- /dev/null +++ b/models/azj-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f07e7ecc38209461f828c837ee260e2ec757f5ee869180f24be9a21caaaf3b +size 145487493 diff --git a/models/azj-script_latin/config.json b/models/azj-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/azj-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/azj-script_latin/vocab.txt b/models/azj-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0d31a4b2b2bbab44e0f0ce23c7fbc2957d210afd --- /dev/null +++ b/models/azj-script_latin/vocab.txt @@ -0,0 +1,42 @@ +1 +t +6 +s +ç + +ğ +_ +e +2 +ü +x +m +n +d +v +b +q +z +k +ö +0 +p +– +g +j +a +i +u +ş +c +l +ı +4 +̇ +o +ə +h +f +- +r +y diff --git a/models/azz/G_100000.pth b/models/azz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fd90517ecaf69fd914168e30c5c860e810407d4 --- /dev/null +++ b/models/azz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17ff0188fee6a8674023fa1f0e2c8d5e5f76389691e408e8b796840230c8e976 +size 145480683 diff --git a/models/azz/config.json b/models/azz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/azz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/azz/vocab.txt b/models/azz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1a6fb321503f47ce25858cae69c3010688569d29 --- /dev/null +++ b/models/azz/vocab.txt @@ -0,0 +1,33 @@ +| +a +i +n +u +e +t +c +o +h +j +m +s +l +q +y +p +d +z +í +r +x +ó +b +g +ú +á +é +f +v +ñ +k + diff --git a/models/bak/G_100000.pth b/models/bak/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..18830d78382ff259216f23b6fa2041f8c2b00c91 --- /dev/null +++ b/models/bak/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9769a2aaf9ef76e2df8a589819e9bdff6032cddc072716330f26709ecab357 +size 145491439 diff --git a/models/bak/config.json b/models/bak/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bak/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bak/vocab.txt b/models/bak/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c8d5b03d6d7114ce28a4a471cd5614949df06966 --- /dev/null +++ b/models/bak/vocab.txt @@ -0,0 +1,47 @@ +ш +з +м +ц +ж +е +ь +й +я +_ +- +в +ю +һ +ч +ъ +ү +a +ҡ +– +т +э +ф +ы +л +ң +и +г +б +о +ҫ +1 +8 +у +п +х +ғ +д +с + +н +ө +р +ә +к +а +ҙ diff --git a/models/bam/G_100000.pth b/models/bam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..32319e6bd6ef327f4d7a6cd8c17a2c03c034addc --- /dev/null +++ b/models/bam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af57c3ad4c77759016835542149dae3933829e5feec3d119ee12944a1be49146 +size 145483005 diff --git a/models/bam/config.json b/models/bam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bam/vocab.txt b/models/bam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b9477f320c02099b126d5e2258fdaaec0afb2111 --- /dev/null +++ b/models/bam/vocab.txt @@ -0,0 +1,36 @@ +| +a +n +i +k +ɛ +e +ɔ +l +u +o +y +m +b +s +w +t +r +d +g +f +j +' +ɲ +c +h +p +z +- +v +ŋ +– +q +1 +8 + diff --git a/models/ban/G_100000.pth b/models/ban/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b72cb11eead96be8926b933611949695a744383 --- /dev/null +++ b/models/ban/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2acea9eee47171ebea5c29a9ecca35fb7a54fdf115709814f194f0532b370e7b +size 145474561 diff --git a/models/ban/config.json b/models/ban/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ban/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ban/vocab.txt b/models/ban/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f90af2db3510a7cf88a06dbfe168d1996684266e --- /dev/null +++ b/models/ban/vocab.txt @@ -0,0 +1,25 @@ +a +| +n +i +e +g +s +u +k +t +r +p +d +m +h +l +y +o +w +b +j +c +- +' + diff --git a/models/bao/G_100000.pth b/models/bao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ecb29682f56c5ea661d04e3fb31e264a7a864d81 --- /dev/null +++ b/models/bao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b505fed4ab7a3103b4bc4602e80b186d812c8183f8f50289dd37e7c8aa52eb2 +size 145489899 diff --git a/models/bao/config.json b/models/bao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bao/vocab.txt b/models/bao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..00b3bceb9ca76b28919b2492560b3f833f392d7b --- /dev/null +++ b/models/bao/vocab.txt @@ -0,0 +1,45 @@ +| +a +i +r +e +c +ʉ +o +j +t +m +u +p +n +ã +b +̃ +w +ĩ +ñ +y +d +g +s +q +õ +é +— +l +í +ũ +ú +á +' +ẽ +f +h +ó +v +z +1 +x +k +́ + diff --git a/models/bav/G_100000.pth b/models/bav/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..86dfdb40133a5a340256f4fc26dcf96ff8174912 --- /dev/null +++ b/models/bav/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc97212dda1ab7c59c2a00060e91fb69c6e0642be1432d127d6b01f7734e2c0 +size 145486062 diff --git a/models/bav/config.json b/models/bav/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bav/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bav/vocab.txt b/models/bav/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..176efe1949b4d38cf4f12d918e9aabe78a08e67d --- /dev/null +++ b/models/bav/vocab.txt @@ -0,0 +1,40 @@ +| +ə +a +ŋ +i +n +ɨ +w +s +u +v +e +k +t +m +y +ɔ +f +' +l +h +b +g +́ +d +o +ù +̀ +z +j +ì +à +r +- +è +p +ò +ú +̰ + diff --git a/models/bba/G_100000.pth b/models/bba/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd751160ce596980efc9d73d66f246ba6f499f9e --- /dev/null +++ b/models/bba/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e05281641c8ea6fa9ffd8fe72d464d0aa460e6a0ed2a44ed1c7d98eac2d1e9a +size 145482247 diff --git a/models/bba/config.json b/models/bba/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bba/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bba/vocab.txt b/models/bba/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6cde200b41872db9921bb92faffb1b3efbae3bbf --- /dev/null +++ b/models/bba/vocab.txt @@ -0,0 +1,35 @@ +| +a +u +n +i +ɔ +s +k +b +m +r +e +ɛ +o +w +y +g +d +t +̃ +p +ã +l +ǹ +ĩ +ù +h +f +̀ +à +è +ì +ũ +ò + diff --git a/models/bbb/G_100000.pth b/models/bbb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..33607a2e54f2f2c8520521e76c5cf0589b107288 --- /dev/null +++ b/models/bbb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69ca80154807981cb0a66a28a4b03eabdac96a30557996f27dc572991be542be +size 145472265 diff --git a/models/bbb/config.json b/models/bbb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bbb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bbb/vocab.txt b/models/bbb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..acb9ba546dced3e0783c6b58cd416c9643df04bf --- /dev/null +++ b/models/bbb/vocab.txt @@ -0,0 +1,22 @@ +| +e +a +i +u +o +n +r +j +k +f +m +v +b +g +d +s +t +z +- +l + diff --git a/models/bbc/G_100000.pth b/models/bbc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f2f88dd90b74e6bca7336f80db1fc6e7477fd44 --- /dev/null +++ b/models/bbc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2d7ad98fab0320a79d6eb96f125c237d6c1cb39101186f0c16c722399c867ef +size 145477627 diff --git a/models/bbc/config.json b/models/bbc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bbc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bbc/vocab.txt b/models/bbc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6c1e943ad709c5c8e9f5021acfa0392796494faf --- /dev/null +++ b/models/bbc/vocab.txt @@ -0,0 +1,29 @@ +e +m +f +k +y +w +a +l +t +i +b +o + +h +v +d +u +g +c +j +z +_ +— +é +- +r +n +p +s diff --git a/models/bbo/G_100000.pth b/models/bbo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8fbc026ee17f47beda89e77463645731771f74bd --- /dev/null +++ b/models/bbo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:351dce2b5551f7d446b65f563e87d184f8bbbbe5d3aee2b6bc8e46f485497c5c +size 145482237 diff --git a/models/bbo/config.json b/models/bbo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bbo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bbo/vocab.txt b/models/bbo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f7cea5c2b57c2798dd95fe44cace57614a5de4e3 --- /dev/null +++ b/models/bbo/vocab.txt @@ -0,0 +1,35 @@ +ɔ +_ +s +f +g +á +ɩ +ɲ +— +̣ +ʋ +u +n +k +m +h +' +y +e +i +ɛ +w +- +j +t +b +p +o + +a +r +d +l +́ +ŋ diff --git a/models/bcc-script_arabic/G_100000.pth b/models/bcc-script_arabic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1da45724021eb743fd75b209c7325f2630abf3b --- /dev/null +++ b/models/bcc-script_arabic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c6ef085c78d465652abe6b517298b1c5308098417b765f15a32b9ccddda926 +size 145479301 diff --git a/models/bcc-script_arabic/config.json b/models/bcc-script_arabic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bcc-script_arabic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bcc-script_arabic/vocab.txt b/models/bcc-script_arabic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f53dda6abe5958c789d289a9ad2b0ec998a4dd10 --- /dev/null +++ b/models/bcc-script_arabic/vocab.txt @@ -0,0 +1,31 @@ +پ +م +ٹ +ی +ت +ێ +ے +چ +ڈ +ا +ن +ش +آ +ب +ۆ +ه +د +ڑ +ک +ز +ل +ژ +و +س +_ + +گ +ج +ر +ئ +- diff --git a/models/bcc-script_latin/G_100000.pth b/models/bcc-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd7361c43a2d27224c492baf580b30b49fa30ff --- /dev/null +++ b/models/bcc-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb3e31eae25b3dd2ea17201f017b708341d183e20d9a51940108a6a60d96cef +size 145477623 diff --git a/models/bcc-script_latin/config.json b/models/bcc-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/bcc-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bcc-script_latin/vocab.txt b/models/bcc-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d4bc85c22bb9aeb92b5a5820fb51c07fc0f16aec --- /dev/null +++ b/models/bcc-script_latin/vocab.txt @@ -0,0 +1,29 @@ +| +a +á +n +h +t +r +s +o +k +i +m +e +d +g +é +y +b +w +p +l +c +ó +z +j +u +' +- + diff --git a/models/bcl/G_100000.pth b/models/bcl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e031c8bb1ccfc32be16f6f5952895505753c917 --- /dev/null +++ b/models/bcl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0258035c9d6bc7187faf0ea0161a88eda1ac54e63b4ecb500d6ce1011795985b +size 145479151 diff --git a/models/bcl/config.json b/models/bcl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bcl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bcl/vocab.txt b/models/bcl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b97a3d77a0e91b1d4a2954f63e6ad97d86b46910 --- /dev/null +++ b/models/bcl/vocab.txt @@ -0,0 +1,31 @@ +a +| +n +i +g +o +s +k +m +d +t +u +l +p +b +r +y +h +e +w +j +- +c +f +— +z +v +' +q +x + diff --git a/models/bcw/G_100000.pth b/models/bcw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b4e2a4e89fa215f27fdbd8ac563fb864b9e25416 --- /dev/null +++ b/models/bcw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb239c8552a415eccbd3d7b0d040f281f15f8b40c9f5b6635336f7442d21d41 +size 145477619 diff --git a/models/bcw/config.json b/models/bcw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bcw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bcw/vocab.txt b/models/bcw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2f768f581d3f2586892e881623bfb5700b0d2f43 --- /dev/null +++ b/models/bcw/vocab.txt @@ -0,0 +1,29 @@ +| +a +ə +i +h +n +m +s +w +k +t +y +g +z +l +b +d +v +r +e +' +ŋ +p +c +ɓ +f +ɗ +j + diff --git a/models/bdg/G_100000.pth b/models/bdg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e93b4f6521657138bc12b99bfd6a2c0caf708c1 --- /dev/null +++ b/models/bdg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c70aae1710b7c2df92d55b2cc98041560aafa848aed8583c712481433e860093 +size 145473891 diff --git a/models/bdg/config.json b/models/bdg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bdg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bdg/vocab.txt b/models/bdg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6ad7ad0e1bc2ae49d18df17ac27322e611ef32eb --- /dev/null +++ b/models/bdg/vocab.txt @@ -0,0 +1,24 @@ +a +m +i +u +e +y +k +' +w +_ +p +n +l +h +g +r +d +s +t + +b +j +- +o diff --git a/models/bdh/G_100000.pth b/models/bdh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c81515191e4a108d9759c8a0d7dbf3fc55850c34 --- /dev/null +++ b/models/bdh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78604e8deaa51f5af336b658dca08b5443dc49baab7b57c412b07fa68c2afd3f +size 145485287 diff --git a/models/bdh/config.json b/models/bdh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bdh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bdh/vocab.txt b/models/bdh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24f716a9580c8d84042171ed4dfbf5166751fac2 --- /dev/null +++ b/models/bdh/vocab.txt @@ -0,0 +1,39 @@ +ṿ +ú +b +a +d +_ +ṛ +w +ị +c +r + +é +ṇ +ɨ +f +s +v +' +á +o +m +ụ +k +h +í +́ +ó +l +ꞌ +n +i +y +p +t +u +g +e +z diff --git a/models/bdq/G_100000.pth b/models/bdq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d47f4f1e2c069b38a96f1f2107226c62c5172c4 --- /dev/null +++ b/models/bdq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f34afaac7546878d91b871c17b65a9e4562faea7bc5fdc9cda80d15dc29a217 +size 145483761 diff --git a/models/bdq/config.json b/models/bdq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bdq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bdq/vocab.txt b/models/bdq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..552f5be666785a4d8f5ce40cf07bf12493ae9766 --- /dev/null +++ b/models/bdq/vocab.txt @@ -0,0 +1,37 @@ +| +ơ +n +h +i +k +a +g +t +m +o +l +ă +b +e +u +s +p +r +̆ +ư +d +' +ô +đ +ĕ +y +- +ê +j +c +ŏ +w +â +ŭ +ĭ + diff --git a/models/bdu/G_100000.pth b/models/bdu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f496a589d5764dd8c51d6efbfca5c7ed437a58d0 --- /dev/null +++ b/models/bdu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155b9e4ec1fbf774f049223abfbea7ca855afdde89e9cdab98c5f71894455316 +size 145489931 diff --git a/models/bdu/config.json b/models/bdu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bdu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bdu/vocab.txt b/models/bdu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..26b13e27690f23343ac01f1f52a05b7ebf3d174f --- /dev/null +++ b/models/bdu/vocab.txt @@ -0,0 +1,45 @@ +| +a +o +n +b +i +ɗ +ɛ +e +m +k +s +ɔ +t +u +y +g +j +w +' +f +d +- +ŋ +r +p +h +c +l +0 +1 +7 +v +2 +4 +z +6 +8 +9 +5 +– +3 +ó +x + diff --git a/models/bdv/G_100000.pth b/models/bdv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7491f1c720f1f3f67adbcd12adfe3c343937478c --- /dev/null +++ b/models/bdv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70821ad4f56419c4eb4b54dca5c6a72cc4b27940fc5e99641986098e4b4db1af +size 145485443 diff --git a/models/bdv/config.json b/models/bdv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bdv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bdv/vocab.txt b/models/bdv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5e42288049a17155fbd1c157e09d6b1479e68f3f --- /dev/null +++ b/models/bdv/vocab.txt @@ -0,0 +1,39 @@ +' +ଞ +ତ +ବ +ସ +ୱ +‍ +ଂ +ଚ +େ +ଜ +ଟ +ନ +ଡ +ହ +ଦ +ଏ +ଙ +୍ +ଗ +କ +ଅ +ଉ +ର +ଇ +ା +ଃ +ଆ +ୁ +ଣ +ଳ +ମ +_ +ପ +ି +ଁ + +ୟ +ଲ diff --git a/models/beh/G_100000.pth b/models/beh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..39260f88b1f5eadc5c66d74571d911a3a896fe7a --- /dev/null +++ b/models/beh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b49d319f3e2c16c295d92920e6e4e9d4cd11dd99322bfaf33b16cc0a59ad083f +size 145479250 diff --git a/models/beh/config.json b/models/beh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/beh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/beh/vocab.txt b/models/beh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2a6b9e78179ae8e017d16ddc88915441331d358b --- /dev/null +++ b/models/beh/vocab.txt @@ -0,0 +1,31 @@ +f +á +s +é +u +m +ó +k +- +l +i +r +w +d +h + +_ +y +g +t +́ +o +e +c +a +í +p +ǝ +ú +n +b diff --git a/models/bem/G_100000.pth b/models/bem/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9bf3fe60528f19a8f38a6f308ca3480f6fadf55e --- /dev/null +++ b/models/bem/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:047a33eddde174df8669fc1896956fe7c6e77945295fe3c89f631dcbed1a870a +size 145480800 diff --git a/models/bem/config.json b/models/bem/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bem/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bem/vocab.txt b/models/bem/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3d8a1c1385037460ca84f6d17556813210955719 --- /dev/null +++ b/models/bem/vocab.txt @@ -0,0 +1,33 @@ +| +a +i +u +e +l +n +k +m +b +o +w +s +t +y +p +h +f +c +g +d +r +' +ū +ā +ē +ī +ō +ŋ +j +- +– + diff --git a/models/ben/G_100000.pth b/models/ben/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4c69f1ca7edc83ffe7aee0b10e034a4606923df --- /dev/null +++ b/models/ben/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c098eab2e5e378fc52bec57683839bbc641b2241033dab17174f6e37db29a4 +size 145512166 diff --git a/models/ben/config.json b/models/ben/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ben/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ben/vocab.txt b/models/ben/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..a60ec003506c95d6bc81b05882fb1a12bc4d49cb --- /dev/null +++ b/models/ben/vocab.txt @@ -0,0 +1,74 @@ +6 +এ +ৃ +5 +ু +ঞ +প +ঘ +8 +ক +ল +9 +ি +ঃ +থ +1 +ভ +দ +ী +_ +স +ড +ঢ +ছ +ই +ং +ঁ +আ +ব +হ +' +2 +— +ঈ +ঊ +ঐ +্ +ে +য +খ +ূ +ম +ত +ঝ +ৌ +4 +ষ +জ +শ +ঔ +অ +ণ +ৎ +7 +ও +ট +ৈ + +ধ +া +ন +0 +3 +ঋ +ফ +চ +উ +- +় +র +ঙ +গ +ো +ঠ diff --git a/models/bep/G_100000.pth b/models/bep/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1ba82e16ab9609dac86e89490ba8e5e20d76efa --- /dev/null +++ b/models/bep/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031a094d0fb040842d9b8a484931d0c6183945d5b5e0a9650fe192052f558dba +size 145476077 diff --git a/models/bep/config.json b/models/bep/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bep/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bep/vocab.txt b/models/bep/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5193c5951f77a0bfce7f703f5116cdbb7dbb4902 --- /dev/null +++ b/models/bep/vocab.txt @@ -0,0 +1,27 @@ +á +o +j +' + +i +u +d +n +_ +k +ꞌ +g +b +r +- +a +t +w +p +l +c +s +h +m +e +ó diff --git a/models/bex/G_100000.pth b/models/bex/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..47666ad041d2d5f1c9f870fdc0c11868b75d7b82 --- /dev/null +++ b/models/bex/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17410376cbe20fa52434fc3be16258687b15cbfa919b9454a9246d2dd42e4e1c +size 145480711 diff --git a/models/bex/config.json b/models/bex/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bex/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bex/vocab.txt b/models/bex/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7367f86d31046b233fea20a884a73e209909679f --- /dev/null +++ b/models/bex/vocab.txt @@ -0,0 +1,33 @@ +| +a +i +k +ɔ +e +n +d +m +b +o +ï +t +' +l +y +g +ö +r +ë +u +z +ŋ +p +s +j +w +c +h +3 +8 +2 + diff --git a/models/bfa/G_100000.pth b/models/bfa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f0e9bc41814a5fc311150336aef27d7d0ced2b4 --- /dev/null +++ b/models/bfa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906284c19451101244e58214cfc4c81113e84154baa42da6b3170828c8b93387 +size 145476857 diff --git a/models/bfa/config.json b/models/bfa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bfa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bfa/vocab.txt b/models/bfa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2e83f52ee806876d9c77d51fa30de75eec47a9d8 --- /dev/null +++ b/models/bfa/vocab.txt @@ -0,0 +1,28 @@ +' +h +r +o +y +a +— +i +g +m +_ +l +s + +n +u +k +j +ŋ +ö +- +d +p +e +w +t +é +b diff --git a/models/bfo/G_100000.pth b/models/bfo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4a6864688f09deb4d206fa027f8ac61a0b2e768 --- /dev/null +++ b/models/bfo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:398e1ad2e47ddd49911bf4cb8a3fabf516e029ec91ce8df16c71915beb38ec84 +size 145493002 diff --git a/models/bfo/config.json b/models/bfo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bfo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bfo/vocab.txt b/models/bfo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8666891ac42fce54c07f6f0624698a77a0634130 --- /dev/null +++ b/models/bfo/vocab.txt @@ -0,0 +1,49 @@ +g +j +ẽ +s +ʻ +ꞌ +u +ʋ +w +õ +k +v +d +m +́ +ú +r +1 +o +p +t +n +f +— +ũ +é +c +_ +ɛ +- + +i +ʼ +ŋ +e +h +á +y +ĩ +̃ +ɩ +ↄ +ƴ +ã +a +b +l +ɓ +ı diff --git a/models/bfy/G_100000.pth b/models/bfy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e495fd44b46527172279220ac16daad1a5d7990b --- /dev/null +++ b/models/bfy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a971d017dc138407c60670418247e61cda4ad9aec31db2a062cb8dfa839389fa +size 145496811 diff --git a/models/bfy/config.json b/models/bfy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bfy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bfy/vocab.txt b/models/bfy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..50736a716227c257cde28624a7e744c3ac1593f5 --- /dev/null +++ b/models/bfy/vocab.txt @@ -0,0 +1,54 @@ +़ +ौ +छ +द +व +स +ह +1 +ू +ि +ट +' +े +य +_ +क +अ +आ +उ +ै +ध +ल + +ब +प +फ +ढ +ड +ई +च +झ +म +ओ +ु +घ +भ +ा +ं +ग +ख +ी +- +ऊ +इ +र +थ +ो +त +न +ँ +ज +् +ठ +ए diff --git a/models/bfz/G_100000.pth b/models/bfz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c71d42108d934df820e60d6ae559fd04d37042d1 --- /dev/null +++ b/models/bfz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6360b45532a90705d574c013e9440bcc690c467967f2936c9124a7fe3b8b9555 +size 145503735 diff --git a/models/bfz/config.json b/models/bfz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bfz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bfz/vocab.txt b/models/bfz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0038e1f4bfd969efcd35bfbd3161f6b87058aac9 --- /dev/null +++ b/models/bfz/vocab.txt @@ -0,0 +1,63 @@ +अ +- +भ +ु +ठ +त +र +़ +आ +8 +ी +प +् +‍ +ग +ू +ढ +फ +ट +ै +च +य +छ +ँ +ं +ए +घ +द +स +म +इ +_ +औ +क +ई +उ +ऊ +ध +थ + +ञ +ा +झ +ल +ब +ड +ो +ह +4 +ण +श +ष +ौ +ः +ि +े +न +ख +व +ज +ृ +ऐ +ओ diff --git a/models/bgc/G_100000.pth b/models/bgc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a19e33c2f1f486a13bbc8d6239de29e83c4b3c61 --- /dev/null +++ b/models/bgc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea31a1bf72bf6af62c33bda4edab178d293c699436aa3880d0438b672993f1e7 +size 145503711 diff --git a/models/bgc/config.json b/models/bgc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bgc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bgc/vocab.txt b/models/bgc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b9f1dc3b3578975bd6518b50f917008fad4ab018 --- /dev/null +++ b/models/bgc/vocab.txt @@ -0,0 +1,63 @@ +े +ै +र +औ +श +ढ +ृ +ख +ठ +ह +ि +फ +न +प +व +झ +ू +च +ग +ँ +थ +ए +ा +द +ओ +उ +् +ऐ +_ +घ +अ +आ +- +य +' +ं +ब +ु +ज + +ो +ल +इ +ण +ष +म +‍ +ऊ +ी +भ +स +़ +ध +ौ +ञ +ऋ +ट +ड +छ +क +ई +ळ +त diff --git a/models/bgq/G_100000.pth b/models/bgq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..95296ce89d9bf271570c702fc425e2cdc0aed362 --- /dev/null +++ b/models/bgq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0866f784d57d48c462aff4463338038885a2da291fce66a33e96b5b51ffa5195 +size 145502965 diff --git a/models/bgq/config.json b/models/bgq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bgq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bgq/vocab.txt b/models/bgq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4a883336f9c0a60537fa2abd3e8f3aec92805131 --- /dev/null +++ b/models/bgq/vocab.txt @@ -0,0 +1,62 @@ +उ +ए +झ +ू +ठ +ी +ण +ड +म +न +य +अ +इ +6 +् +2 +ह +ं +ज +आ +ढ +0 +प +भ +ओ +व +ऊ +_ +क +ख +' +ळ +1 +ो +च +ौ +स + +त +ऐ +ब +5 +ई +ध +ल +छ +3 +ै +़ +ा +ु +- +ट +फ +4 +घ +ग +थ +द +े +र +ि diff --git a/models/bgr/G_100000.pth b/models/bgr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..16f11d5c3d22a7032714aad72697d3d32cc73414 --- /dev/null +++ b/models/bgr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb7202c842f07f5976430a1ab23affdca680fe7e81d93b379e9d7097ab42551 +size 145483010 diff --git a/models/bgr/config.json b/models/bgr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bgr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bgr/vocab.txt b/models/bgr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e8d0b9b91f0291a3455658feb7b2c4b85801242d --- /dev/null +++ b/models/bgr/vocab.txt @@ -0,0 +1,36 @@ +| +a +h +n +i +u +l +t +k +e +m +g +s +w +â +c +r +p +d +o +z +ê +b +f +v +û +ṭ +j +î +' +- +à +1 +y +q + diff --git a/models/bgt/G_100000.pth b/models/bgt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d28060ce6b9b7e46d7a12868a754a61419c799b --- /dev/null +++ b/models/bgt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4abbd5e62a047295db4e135483155c811fde414d010876ebee71004f1279d3 +size 145474533 diff --git a/models/bgt/config.json b/models/bgt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bgt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bgt/vocab.txt b/models/bgt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5fc74e5afe94aa7d695e352e53928d3c208dedf9 --- /dev/null +++ b/models/bgt/vocab.txt @@ -0,0 +1,25 @@ + +' +o +a +p +m +e +w +u +i +j +c +k +n +r +g +s +h +l +t +b +_ +v +f +d diff --git a/models/bgw/G_100000.pth b/models/bgw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6285908e4c3c1f94de6828c6789565f141a4bd93 --- /dev/null +++ b/models/bgw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c64360d65b27cc3919ee24887071363700bbba7cb9e3dba2cbc70d18347c887e +size 145487621 diff --git a/models/bgw/config.json b/models/bgw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bgw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bgw/vocab.txt b/models/bgw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2810d19cecbc5ebba9174775a2b2df6f25265ce7 --- /dev/null +++ b/models/bgw/vocab.txt @@ -0,0 +1,42 @@ +ल +ख +इ +ा +र +स +ह +ि +ई +ी +ब +त +य +ं + +े +म +च +ऊ +ए +ू +ड +ग +6 +ु +उ +भ +क +ओ +‍ +' +ट +़ +अ +द +व +न +ो +आ +ज +प +_ diff --git a/models/bha/G_100000.pth b/models/bha/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5f9ff49bbdcd23d1e4dc09d538194a5b882383b0 --- /dev/null +++ b/models/bha/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1ae6c84da628a59221ee40286fd0d410c712aeae6629efc967d9bcedf0d7b1 +size 145491459 diff --git a/models/bha/config.json b/models/bha/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bha/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bha/vocab.txt b/models/bha/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1b71e778811f0f5dfae765c09bae4f1d4d249c91 --- /dev/null +++ b/models/bha/vocab.txt @@ -0,0 +1,47 @@ +फ +ठ +म +ु +न +इ +स +त +ल +ं +ग +ट +ए +् +' +घ +थ +_ +ख +व +ढ +ो +ड +य +र +छ +अ +द +भ +झ +ू +च +े +ई +प +ह +ज +आ +़ +क +- +ा +ऊ + +ी +ध +ब diff --git a/models/bht/G_100000.pth b/models/bht/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4f3d118b59f9f34dee619c6393af9323e522e59 --- /dev/null +++ b/models/bht/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef95700e10b3dbcde8dae1d9946d8435cd82dd570aa8ff2795c00eb943303cf +size 145506785 diff --git a/models/bht/config.json b/models/bht/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bht/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bht/vocab.txt b/models/bht/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a75cf5fd3ee6ce12fab4335850ab8c39c6222287 --- /dev/null +++ b/models/bht/vocab.txt @@ -0,0 +1,67 @@ +| +ा +े +र +क +ी +द +स +न +ं +ि +ो +य +ल +त +ज +् +ह +म +प +ु +ब +ण +ै +च +श +ग +ई +उ +ड +़ +अ +ू +व +भ +इ +ख +थ +ऐ +आ +फ +छ +ध +झ +ट +ठ +ओ +घ +ौ +ष +- +ए +ढ +ँ +ऊ +ञ +ृ +औ +‍ +ॉ +0 +' +4 +3 +5 +2 + diff --git a/models/bhz/G_100000.pth b/models/bhz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..87b9ef7b8309712d9cf9a3d5fdd27abe13afa11c --- /dev/null +++ b/models/bhz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fae96aa6d29f6cde05d5988d6108c160521b9cfeed6669cdd41df9c8a44f98d +size 145475329 diff --git a/models/bhz/config.json b/models/bhz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bhz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bhz/vocab.txt b/models/bhz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..72a7b64e177e2c4c8cd3a764abe10baa1518f342 --- /dev/null +++ b/models/bhz/vocab.txt @@ -0,0 +1,26 @@ +e +b +t +— +h +ꞌ +l +n +r +_ +k + +d +g +w +u +p +' +o +- +s +a +m +á +ó +i diff --git a/models/bib/G_100000.pth b/models/bib/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fae98f9e58a5c46a1801c6ab96febf202745851 --- /dev/null +++ b/models/bib/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897c373b833a7c353d45bb099fb313551abead66016efd0b94b49c4ad3e3ecdb +size 145481442 diff --git a/models/bib/config.json b/models/bib/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bib/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bib/vocab.txt b/models/bib/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4180aaa7512b5585eebd4aec84ab77c3f61921fc --- /dev/null +++ b/models/bib/vocab.txt @@ -0,0 +1,34 @@ +| +a +ɔ +ɩ +n +m +b +ʋ +k +r +y +ɛ +s +o +w +i +' +ŋ +d +t +e +h +z +g +l +u +ə +c +p +j +f +v +- + diff --git a/models/bim/G_100000.pth b/models/bim/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e87769a050be4f2c7c68019fe6c20b575258053a --- /dev/null +++ b/models/bim/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50f713601fbe078352ce7749ef8df1a92ac832aae61dda48d40664709dea3045 +size 145479143 diff --git a/models/bim/config.json b/models/bim/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bim/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bim/vocab.txt b/models/bim/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a3939e652c929929b04139314a0ebe4aeca8603b --- /dev/null +++ b/models/bim/vocab.txt @@ -0,0 +1,31 @@ +| +a +n +i +k +e +b +t +u +o +m +ɔ +r +s +y +ŋ +p +l +d +j +w +g +f +h +- +c +' +ƒ +q +ê + diff --git a/models/bis/G_100000.pth b/models/bis/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c293b36b4e595661f25f651141d7d4d312589f68 --- /dev/null +++ b/models/bis/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8022e73bea779b5886355f6cd3f89f48a76a045e69c4754c4d3c846334a24af8 +size 145476103 diff --git a/models/bis/config.json b/models/bis/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bis/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bis/vocab.txt b/models/bis/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f2eaf7e34684e03e949cbb253d86620fd96c4077 --- /dev/null +++ b/models/bis/vocab.txt @@ -0,0 +1,27 @@ +| +a +o +e +l +m +n +i +g +t +s +b +u +f +k +y +w +p +r +h +d +v +j +- +' +6 + diff --git a/models/biv/G_100000.pth b/models/biv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c74e3fa0c45c327c9543d3f4589dc09a29aa7288 --- /dev/null +++ b/models/biv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325e379399626063804962052844926ae1b89e1283ce925f1ff572f4ce129acb +size 145484629 diff --git a/models/biv/config.json b/models/biv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/biv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/biv/vocab.txt b/models/biv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..be48e48fde4fc50664e3cf867bae368dc01fd127 --- /dev/null +++ b/models/biv/vocab.txt @@ -0,0 +1,38 @@ +| +a +n +ɩ +ɛ +ɔ +b +r +ʋ +y +l +̃ +s +k +t +e +m +w +i +p +o +u +ã +' +j +d +f +h +c +ŋ +g +ũ +v +õ +ẽ +ĩ +- + diff --git a/models/bjr/G_100000.pth b/models/bjr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..be8a1a1f767d7ed492830107f80b9d8832b5120c --- /dev/null +++ b/models/bjr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d33ed05c5ca01b4f0d6a62b25db782c00d86b5a759d439bb6cfcd15a206af5 +size 145485277 diff --git a/models/bjr/config.json b/models/bjr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bjr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bjr/vocab.txt b/models/bjr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..74aa451b89c328653101d17a1745ff580a8910ad --- /dev/null +++ b/models/bjr/vocab.txt @@ -0,0 +1,39 @@ +a +| +i +n +q +m +o +r +u +e +f +s +k +d +t +í +á +y +p +b +é +ú +ó +h +g +- +0 +̱ +2 +w +1 +4 +5 +' +6 +9 +7 +ṉ + diff --git a/models/bjv/G_100000.pth b/models/bjv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f2a2090c37aa5465ea9d71f938a7f108768eed0 --- /dev/null +++ b/models/bjv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e221ecc2b1e2189f0510d292146c675c1c20a9f7d241597589a76bd7ae17b8a +size 145479153 diff --git a/models/bjv/config.json b/models/bjv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bjv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bjv/vocab.txt b/models/bjv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7e30025f043584c3906f4ba7d1271126570b6981 --- /dev/null +++ b/models/bjv/vocab.txt @@ -0,0 +1,31 @@ +| +ɨ +a +n +ə +k +e +t +d +j +l +m +o +g +i +r +ɔ +s +- +u +w +b +y +ɓ +ḛ +̰ +p +é +ḭ +h + diff --git a/models/bjw/G_100000.pth b/models/bjw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c1a358bb4f05a5718dea6a8c815ea5d0ac6c2e6 --- /dev/null +++ b/models/bjw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b9f9d2343ca40995fd3afe90d9ff99a0b699e01acd3f098a85db4c182c7a58d +size 145482239 diff --git a/models/bjw/config.json b/models/bjw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bjw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bjw/vocab.txt b/models/bjw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..77b1330814faf54f5777d91f3372d70c3c1e7efd --- /dev/null +++ b/models/bjw/vocab.txt @@ -0,0 +1,35 @@ +w +ö +c +ü +d +ä +ë +f +e +m +l +u +ɔ +o +ɩ +s +g +t +b +y +r +‐ +ŋ +ɛ +' +i +ʋ +p +a +_ +n +j +k +v + diff --git a/models/bjz/G_100000.pth b/models/bjz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e48b06c4f7fb44f370755a5435548ed75abe58d --- /dev/null +++ b/models/bjz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1eeebf76a61607b1607be7daf6a7258e57ff316f85170dcbdc3039fdff9ae4c +size 145491429 diff --git a/models/bjz/config.json b/models/bjz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bjz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bjz/vocab.txt b/models/bjz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ebfe35da0a792a7384b9f513ea2629e16f0449a4 --- /dev/null +++ b/models/bjz/vocab.txt @@ -0,0 +1,47 @@ +9 +o +ú +ã +k + +ũ +j +h +- +_ +b +ó +1 +i +0 +y +u +é +g +' +v +e +t +í +f +c +2 +n +l +s +4 +5 +r +ĩ +m +3 +q +d +7 +w +8 +á +6 +a +p +ẽ diff --git a/models/bkd/G_100000.pth b/models/bkd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d30b8088fb59792be29f4f132f118619e1968fc5 --- /dev/null +++ b/models/bkd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56119ffa12e775a697c8d2ba9769ffd5d1ce17a81b99b3d43c20f4e52a849bed +size 145487595 diff --git a/models/bkd/config.json b/models/bkd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bkd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bkd/vocab.txt b/models/bkd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..65aa3e320ff5c737170a54700233b989f3bd78a2 --- /dev/null +++ b/models/bkd/vocab.txt @@ -0,0 +1,42 @@ +w +- +c +3 +k +l +v + +p +7 +4 +r +' +x +0 +2 +5 +z +t +à +m +ù +j +n +a +_ +u +s +y +b +9 +o +1 +g +h +e +d +ì +i +f +6 +è diff --git a/models/bkv/G_100000.pth b/models/bkv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..570b4aa923c2762b386ea93ef2f0c594d3122169 --- /dev/null +++ b/models/bkv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcddc3256f314b509f6c158981258186095e6591c7253c8ab7939e41547f5bbd +size 145496787 diff --git a/models/bkv/config.json b/models/bkv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bkv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bkv/vocab.txt b/models/bkv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4b466542a67eed699f2e0136216df9f5527377a1 --- /dev/null +++ b/models/bkv/vocab.txt @@ -0,0 +1,54 @@ +| +a +i +n +e +g +u +' +k +h +b +m +y +o +r +t +c +s +w +d +p +j +f +è +- +ì +l +é +á +ú +í +ē +ě +ī +ū +̀ +à +ā +ò +ù +ǹ +ó +ń +ō +̄ +î +û +0 +ḿ +â +ǎ +7 +8 + diff --git a/models/blh/G_100000.pth b/models/blh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6aa7178cbed38fbcdde6b06ff56dccddeaed3a76 --- /dev/null +++ b/models/blh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:863476425345fdf11fb046711d5064508d34eb9fa66546c26cf0dfdc98e44329 +size 145489797 diff --git a/models/blh/config.json b/models/blh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/blh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/blh/vocab.txt b/models/blh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2a9bfd65a14d9e31a7e2d16fa8d3e7d0b5840454 --- /dev/null +++ b/models/blh/vocab.txt @@ -0,0 +1,45 @@ +ù +w +e +ó +_ +ě +p +y +è +a +i +ǔ +̀ +ǎ +á +ŋ +b +ɤ +s +k +u +í +ɔ +j +m +l +ò +ì +f +t +n +ú +g +é +o +ǐ +1 +̌ + +ŏ +d +ɛ +à +v +́ diff --git a/models/blt/G_100000.pth b/models/blt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..598349e14a76164263d068a05680476d2e0a1d95 --- /dev/null +++ b/models/blt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e96ba86bbb7ac24f26e89de78d480a9084155f0e7163fd64ae8fbcf3e9e773c +size 145509201 diff --git a/models/blt/config.json b/models/blt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/blt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/blt/vocab.txt b/models/blt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..29a65dbe5558b1f1ac11098bfa9c107bc0bf018f --- /dev/null +++ b/models/blt/vocab.txt @@ -0,0 +1,70 @@ +ụ +m +e +ệ +ộ +ẹ +n +v +c +ă +ỉ +ê +ô +ở + +k +i +ứ +a +_ +s +q +0 +h +ú +9 +ọ +‐ +ắ +o +ể +2 +5 +l +u +ơ +ế +ỏ +t +ả +á +d +í +ẳ +é +ợ +7 +ẻ +p +ố +b +ạ +g +1 +' +3 +ử +ặ +ị +ư +ổ +y +8 +ớ +ự +ó +đ +ủ +4 +6 diff --git a/models/blx/G_100000.pth b/models/blx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f80e9c33325e8dc048b06c8c6dec5c84db3e807d --- /dev/null +++ b/models/blx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e189f2a2ccca9745d047d585ae098fbeceb0ebe622c4a23532b3b784181eed97 +size 145479151 diff --git a/models/blx/config.json b/models/blx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/blx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/blx/vocab.txt b/models/blx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..11691f8f7049896c24b9f0d0f2aedac7a28f3a15 --- /dev/null +++ b/models/blx/vocab.txt @@ -0,0 +1,31 @@ +o +d +k +w +f +a +n +_ +g +' +q + +u +t +ê +- +l +j +s +c +h +p +e +y +r +v +i +x +m +b +z diff --git a/models/blz/G_100000.pth b/models/blz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..89b544d313c52ab5d8dc93726e0ffeef6aa137f3 --- /dev/null +++ b/models/blz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da54d6a54c9a1b25c4dfa4ac908fdef83d0dd6c77d44323216c4272ec4daa49d +size 145476865 diff --git a/models/blz/config.json b/models/blz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/blz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/blz/vocab.txt b/models/blz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d0e7f36d75f90c0e15038fed63d2f751745e3b90 --- /dev/null +++ b/models/blz/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +i +o +u +m +' +k +s +e +t +l +g +r +p +b +y +d +w +- +h +f +j +z +c +— + diff --git a/models/bmq/G_100000.pth b/models/bmq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..871f6a0cefcbced7af2b44ac5c97c77353bbf4b5 --- /dev/null +++ b/models/bmq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccf5f91f4a1d0d6d4f1c8796386df2735116aac512066f84c9336dca0518c548 +size 145492321 diff --git a/models/bmq/config.json b/models/bmq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bmq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bmq/vocab.txt b/models/bmq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3bf28f6ded495595ec53ba600188b01935457f32 --- /dev/null +++ b/models/bmq/vocab.txt @@ -0,0 +1,48 @@ +ɲ +h + +s +d +ɛ +v +l +ā +w +í +̀ +c +ū +ò +u +ē +e +i +m +à +y +ɓ +ú +è +a +– +f +ì +t +g +b +ō +o +n +_ +z +̄ +k +ù +r +p +' +á +é +- +ī +ó diff --git a/models/bmr/G_100000.pth b/models/bmr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc6221747af2b6d2a27690939a0ea52bce63ad66 --- /dev/null +++ b/models/bmr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8a2b1d37993250ddda36cca2227940b835387c533050bf0fae0f01197a49c3 +size 145478377 diff --git a/models/bmr/config.json b/models/bmr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bmr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bmr/vocab.txt b/models/bmr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..49ff608f37cbad87fb7e9111d235cc3cf4570f40 --- /dev/null +++ b/models/bmr/vocab.txt @@ -0,0 +1,30 @@ +k +q +y +c +f +x +u +t +i +ɨ +g +h +ñ +b +o +j +r +l +z +- +v +_ +d +a +e +s +p +m +n + diff --git a/models/bmu/G_100000.pth b/models/bmu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0dd747763bccb422c65b20964d5e6e311e8c2e6 --- /dev/null +++ b/models/bmu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0154bb95f657dac534a7cf6082ba5c8589911fcf71c1768e277ce287e25235 +size 145485273 diff --git a/models/bmu/config.json b/models/bmu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bmu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bmu/vocab.txt b/models/bmu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b84af31f584e42635f212982bc44d11781a8d38a --- /dev/null +++ b/models/bmu/vocab.txt @@ -0,0 +1,39 @@ +b +j +8 +z +l +a +e +u +ŋ +7 +i +k +n +3 +2 +4 +o +s +_ +g +q +' +1 +6 +5 +y +f + +9 +d +r +m +w +h +- +0 +ö +t +p diff --git a/models/bmv/G_100000.pth b/models/bmv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6008d1938980b0aed2b2ec4aadbf524f5596eb2 --- /dev/null +++ b/models/bmv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf74831a35b8b4f913593de00d8d1983d4fb06e86d4d8eb64ab0ae2225ebbe62 +size 145486067 diff --git a/models/bmv/config.json b/models/bmv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bmv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bmv/vocab.txt b/models/bmv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d29d7ee6e496a401e1f6002136c60fe54477f3a --- /dev/null +++ b/models/bmv/vocab.txt @@ -0,0 +1,40 @@ +t +ê +h +g +i +à +ò +f +m +è +a +k +ù +ì +ô +̀ + +u +j +ə +w +e +ɨ +b +- +s +̂ +o +y +c +_ +l +û +ŋ +n +d +' +p +â +î diff --git a/models/bng/G_100000.pth b/models/bng/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..801056c623bc7eb8ca5c45506cb48c7c0a9fe4fd --- /dev/null +++ b/models/bng/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91336ca86e6bb2484125692439b5b3508b0ef1abd30c460f51efe40582432cf0 +size 145503735 diff --git a/models/bng/config.json b/models/bng/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bng/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bng/vocab.txt b/models/bng/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1de185348f6c847441888b0bd72d299d9297644d --- /dev/null +++ b/models/bng/vocab.txt @@ -0,0 +1,63 @@ +| +ে +া +র +্ +ন +ত +ি +ক +য +ব +ম +দ +ল +স +প +় +ু +আ +ো +শ +ছ +জ +হ +এ +ই +ী +গ +খ +থ +ও +ষ +ধ +ভ +চ +অ +ঁ +ং +ট +ণ +ড +উ +ূ +ঈ +ৃ +ঙ +ফ +ঠ +ঘ +ৌ +ঞ +ৎ +ৈ +ঝ +ঃ +ঐ +ঢ +- +ঋ +' +p +ঊ + diff --git a/models/bno/G_100000.pth b/models/bno/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3dce0a506d5598a667514965922e8b12d4387ee3 --- /dev/null +++ b/models/bno/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3f9b6ebfd9ca46c09136c3c21cc8d9938ceedfba1796914140f1b32d0337b0 +size 145480709 diff --git a/models/bno/config.json b/models/bno/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bno/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bno/vocab.txt b/models/bno/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..63cfb0028384f6c5d188fef4985dcaaa06440fbd --- /dev/null +++ b/models/bno/vocab.txt @@ -0,0 +1,33 @@ +c +b +q +é +z +à +i +_ +g +t +a +' +m +h +k +s + +- +y +á +v +w +o +p +d +u +l +r +x +e +f +j +n diff --git a/models/bnp/G_100000.pth b/models/bnp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1828b692b3d2b082546c692aff94d0424af4eb3e --- /dev/null +++ b/models/bnp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1cefbb215e33369271e1a870477cf1e10cf8d0cfb0ef19849079545172811f9 +size 145481431 diff --git a/models/bnp/config.json b/models/bnp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bnp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bnp/vocab.txt b/models/bnp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7085dffd3a7012ed44902564d718a49924e33eee --- /dev/null +++ b/models/bnp/vocab.txt @@ -0,0 +1,34 @@ +­ +3 +t +a +6 +u +8 +l +p +o +d +s +1 +r +n +- +_ +i +m +e +h +2 +5 +b +v +7 +4 + +k +g +0 +' +9 +j diff --git a/models/boa/G_100000.pth b/models/boa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bded6eac1575215ebdf765adf32a1daaee479099 --- /dev/null +++ b/models/boa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f93746a7a5e58fd4ed5da9c7d0785b3f9a71bd8241008df4d871c5023ba5ead +size 145490001 diff --git a/models/boa/config.json b/models/boa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/boa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/boa/vocab.txt b/models/boa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b28ddfee64e2f96bc209538af5aa70a51b7617ec --- /dev/null +++ b/models/boa/vocab.txt @@ -0,0 +1,45 @@ +| +á +e +é +a +í +m +t +j +y +ú +h +u +i +n +c +d +b +ɨ +v +s +r +ó +o +l +́ +p +k +w +ñ +— +g +- +0 +1 +7 +4 +2 +3 +5 +6 +8 +' +9 + diff --git a/models/bod/G_100000.pth b/models/bod/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..01334829a7e00efe7aef7a7ac072b6140c6ea610 --- /dev/null +++ b/models/bod/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fae2a471bcea5a05a2492cdc39de2c4e55996206a739125d0265fddcc58c585b +size 145501443 diff --git a/models/bod/config.json b/models/bod/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bod/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bod/vocab.txt b/models/bod/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8ffb1c4a3a0444133e856de378994ceb8e625d54 --- /dev/null +++ b/models/bod/vocab.txt @@ -0,0 +1,60 @@ +་ +ས +ི +ད +ག +ོ +ན +ང +བ +མ +ར +ུ +ེ +ྱ +འ +ལ +པ +ཁ +ཡ +ཀ +ྲ +ཞ +ཅ +ཕ +ྣ +ཆ +ྟ +ཐ +ཤ +ྐ +ཏ +ཉ +ྒ +ཚ +ཟ +ཱ +ླ +ྡ +ཙ +ྨ +ྤ +ཇ +ྗ +ཛ +ྔ +ཨ +ྩ +ཧ +ྙ +ྷ +ྦ +ྫ +ྕ +ྭ +ཝ +ཻ +ཌ +ཊ +ཪ + diff --git a/models/boj/G_100000.pth b/models/boj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bcf14cf2dd0ef0c95179b4e7fff29cc551c4d4ab --- /dev/null +++ b/models/boj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e724899c92def9b3cb37ab37f2a68136348774bf7066612acc166e94b5244b53 +size 145484553 diff --git a/models/boj/config.json b/models/boj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/boj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/boj/vocab.txt b/models/boj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7e6bca46b227b370995a916c1ed3fd574a8ec029 --- /dev/null +++ b/models/boj/vocab.txt @@ -0,0 +1,38 @@ +ñ +f +8 +9 +6 +j +1 + +b +' +y +3 +_ +v +r +u +d +n +0 +o +i +g +4 +t +h +w +s +7 +m +p +q +ŋ +2 +e +k +l +5 +a diff --git a/models/bom/G_100000.pth b/models/bom/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a95bd2f2a3eafd38f24a4e70b4189c1c2704a122 --- /dev/null +++ b/models/bom/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:383875e23321cce0362dabfc3b19163cddd2fde3d29b88c9d118e5693c4c1922 +size 145502220 diff --git a/models/bom/config.json b/models/bom/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bom/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bom/vocab.txt b/models/bom/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..166c6640f207f29e7dfbd329a86192c2ae78807d --- /dev/null +++ b/models/bom/vocab.txt @@ -0,0 +1,61 @@ +ō +ǒ +h +g +a +ó +k +ɛ +ǔ +ú +ò +y +é +́ +í +ê +à +i +- +— + +w +d +z +s +l +p +è +u +r +v +n +ɔ +â +á +_ +' +j +ù +c +ū +̌ +̂ +̄ +o +û +m +e +f +ī +î +t +ì +ǎ +̀ +ě +b +ā +ǐ +ô +ē diff --git a/models/bor/G_100000.pth b/models/bor/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..38f5b1b107456ca3af6a09f3ae79c609a5341eaa --- /dev/null +++ b/models/bor/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8673b6c3b7cedf679f71567cb1a86a3ca52f0fc927ead87818ffc9277706f12 +size 145492981 diff --git a/models/bor/config.json b/models/bor/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bor/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bor/vocab.txt b/models/bor/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2d91653c7af58c8389dcd01645761354e74cce6e --- /dev/null +++ b/models/bor/vocab.txt @@ -0,0 +1,49 @@ +0 +a +č +é +w +g +ê +b +o +d +q +m +v +ç +s +k +e +l +4 +6 +h +ã +x +r +á +f +n +_ +3 +2 + +i +ó +í +ü +ô +– +5 +j +' +7 +z +1 +u +c +p +â +ú +t diff --git a/models/bov/G_100000.pth b/models/bov/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..34369e8db22bcf190c90a8b9083e7211545bcdd2 --- /dev/null +++ b/models/bov/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a07b1ca771150837f39ff40f95d0ef3dee8de5c7a6f78412ae2dde2b10c66b79 +size 145483883 diff --git a/models/bov/config.json b/models/bov/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bov/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bov/vocab.txt b/models/bov/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5340f112a261152095f150ef3d810319b14fb96b --- /dev/null +++ b/models/bov/vocab.txt @@ -0,0 +1,37 @@ +s +' +o +v +1 +̃ +_ +r +f +- +p +z +k +g +a +ã +u +h +t +́ +l +b +ó +m +y +í +w + +e +i +á +d +ɛ +ɔ +n +ũ +ĩ diff --git a/models/box/G_100000.pth b/models/box/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac99c723048285c2d29ffe9db07c8c009032ea6e --- /dev/null +++ b/models/box/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c92773e4881b025da77acc1096429e1d87504d874a74f7ca900f6e144407fd2d +size 145492977 diff --git a/models/box/config.json b/models/box/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/box/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/box/vocab.txt b/models/box/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9163f9884861fd5bccd303802a6b7374c69e1e04 --- /dev/null +++ b/models/box/vocab.txt @@ -0,0 +1,49 @@ +d +ṹ + +b +̃ +́ +s +ń +ù +h +y +ò +ɔ +_ +o +a +ó +é +6 +è +ǹ +l +í +- +ã +m +e +ɲ +k +ɓ +n +c +t +r +f +p +ĩ +w +ì +v +u +ú +̀ +z +á +ɛ +ũ +i +à diff --git a/models/bpr/G_100000.pth b/models/bpr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..130b5fb5b29944d347732d658c63ef0cc2b1fe49 --- /dev/null +++ b/models/bpr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4868c63e258e079e04bb3dd5ee5b1142c4bae776af1370e66dd0f574f07d0e82 +size 145485947 diff --git a/models/bpr/config.json b/models/bpr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bpr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bpr/vocab.txt b/models/bpr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bbd08f831cf751130e01857784e90d559b79ea05 --- /dev/null +++ b/models/bpr/vocab.txt @@ -0,0 +1,40 @@ +| +a +n +d +i +u +l +t +m +g +e +k +o +s +y +f +b +é +w +à +è +h +ò +r +ù +ê +p +- +ì +0 +1 +2 +' +4 +3 +7 +8 +c +5 + diff --git a/models/bps/G_100000.pth b/models/bps/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..05ac3aadefa1811c9c3337ca592ce449e2954117 --- /dev/null +++ b/models/bps/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:611aa374855da73ad3ac68229b3f371f2ce97f20461270b57e339e6ccf825ec9 +size 145478498 diff --git a/models/bps/config.json b/models/bps/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bps/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bps/vocab.txt b/models/bps/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..28ea8b7baa52bf1ab482b31297bb396e03538472 --- /dev/null +++ b/models/bps/vocab.txt @@ -0,0 +1,30 @@ +| +a +n +d +i +u +l +t +g +m +e +k +s +f +o +y +b +w +à +é +è +ò +h +r +ù +ê +' +ì +p + diff --git a/models/bqc/G_100000.pth b/models/bqc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..226bbdac63fc503da43f566b8d66055e893c4aa9 --- /dev/null +++ b/models/bqc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b836ce98b8ab7c56060ae5d7be01d14d33032b267d453ffab33ecdd3e5ea5568 +size 145491465 diff --git a/models/bqc/config.json b/models/bqc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bqc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bqc/vocab.txt b/models/bqc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fb134ed1622352bb5a6afb5eca72a6f0b5930fbf --- /dev/null +++ b/models/bqc/vocab.txt @@ -0,0 +1,47 @@ +| +a +ɛ +ↄ +i +́ +l +k +u +b +̃ +s +n +p +g +m +à +e +w +o +d +ã +á +y +̀ +z +ń +ĩ +t +ũ +ì +í +ò +f +v +ó +ɔ +' +ú +è +ù +ṹ +é +ǹ +h +ḿ + diff --git a/models/bqi/G_100000.pth b/models/bqi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c16c97114008ad53b33b26f411a113a69bd3ddc4 --- /dev/null +++ b/models/bqi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e07e8855c3d40b244833b8f1e9e70d2f580921f6552b7e3798c45dea6b03fb50 +size 145488367 diff --git a/models/bqi/config.json b/models/bqi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bqi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bqi/vocab.txt b/models/bqi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..330485b4ba11932769f08ca7c1d7413dc650bb47 --- /dev/null +++ b/models/bqi/vocab.txt @@ -0,0 +1,43 @@ +| +ی +ا +و +ه +ن +م +ر +س +ک +ب +ݚ +ت +ٚ +د +خ +ز +ل +ش +گ +پ +ع +ح +ۨ +ج +ق +چ +ف +آ +ط +غ +ص +ٛ +ذ +ض +ث +ظ +ئ +ژ +ء +ۆ +ێ + diff --git a/models/bqj/G_100000.pth b/models/bqj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..22c32cd3ca67d4d528ea3ddaecddb25e148586f3 --- /dev/null +++ b/models/bqj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a0116ec25d24a025228807eec9561789e57e7203c8db20d2e16c75bad747907 +size 145483767 diff --git a/models/bqj/config.json b/models/bqj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bqj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bqj/vocab.txt b/models/bqj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e965cace7b47962609bd60f2d43304a21d89fbef --- /dev/null +++ b/models/bqj/vocab.txt @@ -0,0 +1,37 @@ +| +a +n +u +e +i +m +o +b +g +l +t +r +j +s +f +y +h +á +w +ᵽ +ú +k +ŋ +' +ñ +é +ĉ +í +ó +d +v +c +p +- +– + diff --git a/models/bqp/G_100000.pth b/models/bqp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8ea722de0b0c944455704fb241906f3dcbe0472 --- /dev/null +++ b/models/bqp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadad05e393ee9bf2f00d2a55501f06af43756daf7e79456c638108f6a970bae +size 145499893 diff --git a/models/bqp/config.json b/models/bqp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bqp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bqp/vocab.txt b/models/bqp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d425fdaee043519de24e6eb37baedf582484b71c --- /dev/null +++ b/models/bqp/vocab.txt @@ -0,0 +1,58 @@ +m +k +ɛ +á +6 +2 +5 +h +o +' +z +9 +t +p +3 +à +s +é +́ +b +ũ +8 +ń +a +v +e +ɔ +̀ +r +i +ó +y +ĩ +ḿ +- +f +ú +_ +̃ +è +0 +ã +ǹ +n +ù +ò + +d +7 +g +í +u +4 +l +ↄ +ì +1 +w diff --git a/models/bru/G_100000.pth b/models/bru/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ba9d5bfd2e5aab25aded337ce4eace1d7927a21 --- /dev/null +++ b/models/bru/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:014a08c57901eb629e3c9505f95d536a607e50b2c634b7814931564c6306567a +size 145492213 diff --git a/models/bru/config.json b/models/bru/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bru/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bru/vocab.txt b/models/bru/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..61ddbe9be1519190d2e785eb81f291717c2de130 --- /dev/null +++ b/models/bru/vocab.txt @@ -0,0 +1,48 @@ +| +a +n +i +c +t +h +q +g +m +s +o +u +á +r +l +p +y +â +ớ +ỡ +k +ễ +ứ +b +ũ +ĩ +ữ +ỗ +ê +‐ +d +e +ơ +í +ṓ +- +ố +ó +ô +v +ế +ō +é +ư +ú +' + diff --git a/models/bsc/G_100000.pth b/models/bsc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c2a44ec37d54dc92c22876675b45d6a382c95c18 --- /dev/null +++ b/models/bsc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9919c10d01df441209e2d3f2c7ccdcb5b37a243b63e4f170beb10e37df4461fd +size 145482243 diff --git a/models/bsc/config.json b/models/bsc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bsc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bsc/vocab.txt b/models/bsc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b7ba97d6079d863f2f69ddf2e6eb497ec36cda62 --- /dev/null +++ b/models/bsc/vocab.txt @@ -0,0 +1,35 @@ +ñ +l +̃ +ɗ +ŋ +ë +- +w +ɓ +e +a +c +o + +ƴ +f +p +u +ỹ +h +m +t +x +k +b +g +y +j +d +_ +r +n +i +ŝ +s diff --git a/models/bsq/G_100000.pth b/models/bsq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8dccb13dc76375c05a3e2ef6044c86d0d5b2bb4 --- /dev/null +++ b/models/bsq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e121f01de96e3b7f6893d4f505eadcf0cc3c8260607629ae5af86d08f7146cbe +size 145501448 diff --git a/models/bsq/config.json b/models/bsq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bsq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bsq/vocab.txt b/models/bsq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6ccafc2bc99757b167ae6a8413b4a8c55ddb75e6 --- /dev/null +++ b/models/bsq/vocab.txt @@ -0,0 +1,60 @@ +| +ɔ +ɛ +n +̀ +ɖ +k +ɓ +m +y +é +a +à +́ +ì +w +í +- +d +è +ě +p +á +g +u +̃ +i +s +ú +ò +b +o +ù +e +ó +j +z +t +h +c +ǎ +ḿ +ǔ +ǐ +ĩ +̌ +ǒ +f +x +ã +ń +v +ũ +̄ +ū +ṹ +ǹ +ē +' + diff --git a/models/bss/G_100000.pth b/models/bss/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3370c45a2fc45d43a33e67b10c50da5978771a5 --- /dev/null +++ b/models/bss/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b27d96654cbf7cc2d685193892739fd77624427931080b91c42cfb079093a8a +size 145500669 diff --git a/models/bss/config.json b/models/bss/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bss/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bss/vocab.txt b/models/bss/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..db47ca0c1dd6dd9a32e4dc7463e36da14a56098c --- /dev/null +++ b/models/bss/vocab.txt @@ -0,0 +1,59 @@ +| +n +b +é +ɛ +e +m +á +d +a +́ +y +l +ɔ +s +k +t +g +ə +h +w +ŋ +o +ʼ +̄ +p +í +- +c +ó +i +ē +ê +u +ǒ +â +ḿ +ě +ú +̂ +̌ +r +z +ń +ǎ +ā +ǐ +ī +ō +ô +j +î +ū +û +f +ǔ +' +v + diff --git a/models/btd/G_100000.pth b/models/btd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2f6c9709aeb8893d9cc41b12b0e6d8f18c544f1 --- /dev/null +++ b/models/btd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5c934cbbd906bc63b6a2e88cd171cdcfc6d8361f070b595de08a2750c1f360 +size 145484545 diff --git a/models/btd/config.json b/models/btd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/btd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/btd/vocab.txt b/models/btd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8ef305dec41e70fac732feb0865708c1449d298a --- /dev/null +++ b/models/btd/vocab.txt @@ -0,0 +1,38 @@ +d +- +m +8 +à +w +b +l +c +h +r +q + +s +9 +o +y +2 +e +g +j +a +_ +t +f +è +3 +5 +z +i +0 +ì +p +k +n +u +1 +' diff --git a/models/bts/G_100000.pth b/models/bts/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b54276bf8d3a9c6a19cf9653a1b06875069fd208 --- /dev/null +++ b/models/bts/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b178d4c0d2f0eec330d53a1662cae6be418eb8a5e265d8972424dab7803b1436 +size 145476979 diff --git a/models/bts/config.json b/models/bts/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bts/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bts/vocab.txt b/models/bts/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bf963ca99373328d6ed17ba76b3379b621e15b5b --- /dev/null +++ b/models/bts/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +i +o +h +m +s +u +g +r +t +b +d +p +e +l +k +j +- +y +w +f +v +z +c +x + diff --git a/models/btt/G_100000.pth b/models/btt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c143b0a9fed9fe2f23c1ceb70875b201c26499db --- /dev/null +++ b/models/btt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4bcf2cd9fb7c3c7bd7ed3ea0e90ea1cbbfad2f4668e349d05baa805e0e16cb7 +size 145495271 diff --git a/models/btt/config.json b/models/btt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/btt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/btt/vocab.txt b/models/btt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..51286667af9f93f4705959bda41e8bd13366cd6f --- /dev/null +++ b/models/btt/vocab.txt @@ -0,0 +1,52 @@ +î +ǎ +i +k + +ó +ē +s +p +a +ū +í +g +û +w +á +- +ě +ê +ù +t +n +j +é +_ +l +è +f +ā +r +m +â +ǒ +z +d +y +ī +' +c +ǔ +b +ò +ǐ +ı +u +o +ì +à +ú +ō +h +e diff --git a/models/btx/G_100000.pth b/models/btx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6385d0d9173de0e27f7da9690feea66235071b0e --- /dev/null +++ b/models/btx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58aadd4d3dd2774099996d386260ed0691a27d5808f3f440d21c1e4ae5b7f92f +size 145482213 diff --git a/models/btx/config.json b/models/btx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/btx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/btx/vocab.txt b/models/btx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6ccfb9fa9eca42cf3793e0e49df4a3ccbe0f3a6e --- /dev/null +++ b/models/btx/vocab.txt @@ -0,0 +1,35 @@ +2 +- +h +1 +t +b +p +u +r +8 +d +4 +j +0 +7 +e +3 +6 +a +' +n +i +w +k +z +y +m +_ +g +c +o + +5 +l +s diff --git a/models/bud/G_100000.pth b/models/bud/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fbf5301bb289f5a63009cc4d0059223c47e81d97 --- /dev/null +++ b/models/bud/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c27c79991ddf593133361df6d673c02af0110e77bcf8eca60a735b3ab62b39c2 +size 145494523 diff --git a/models/bud/config.json b/models/bud/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bud/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bud/vocab.txt b/models/bud/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..522dc7b45ef22935e8de48c217cddc09d73baaa8 --- /dev/null +++ b/models/bud/vocab.txt @@ -0,0 +1,51 @@ +| +i +n +a +k +e +b +u +t +í +m +ɔ +d +y +p +á +l +ŋ +- +s +o +ì +ī +à +f +ǹ +ā +c +ń +j +ú +g +r +̄ +́ +ū +é +ò +w +̃ +è +ē +ù +̀ +ĺ +ḿ +2 +ï +1 +0 + diff --git a/models/bul/G_100000.pth b/models/bul/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b944e5001c2f935d2ad3e22265a4352917e32e52 --- /dev/null +++ b/models/bul/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c891f78426ec5b7827a985a3b7210a76511ff501055ce3862863dc85e40b5f6b +size 145483783 diff --git a/models/bul/config.json b/models/bul/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bul/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bul/vocab.txt b/models/bul/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..0fd96247c45e5b9b8b5bd5735791b7e678eedf2c --- /dev/null +++ b/models/bul/vocab.txt @@ -0,0 +1,37 @@ +п +е +р +– +х +щ +- +м +у +в +ф +ѝ +г +я +к +ц +ю +̀ +и +н +л +з +_ +й +ь +ѐ +о +с +б +ж +д + +ч +ъ +т +а +ш diff --git a/models/bus/G_100000.pth b/models/bus/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f2e07c6333096907eeafae1fc932d3af5c2168c --- /dev/null +++ b/models/bus/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e3aa5b21a9fe5c77efc1856e88b174ab869d233d7524f4d5513f94da682edb +size 145492215 diff --git a/models/bus/config.json b/models/bus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bus/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bus/vocab.txt b/models/bus/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dec7f727ce926a504121d520b32c533c084f8819 --- /dev/null +++ b/models/bus/vocab.txt @@ -0,0 +1,48 @@ +e +h +n +ù +b +z +è +p +g +ã +ǹ +ó +m +s +́ +v +é +k +y +ɛ +i + +̃ +à +a +' +- +̀ +u +ↄ +ĩ +ò +l +á +ḿ +d +t +f +ú +o +ń +w +_ +í +r +ɔ +ũ +ì diff --git a/models/bvc/G_100000.pth b/models/bvc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cdadfae05a508b78c4063c8b4e57199e782c1f8b --- /dev/null +++ b/models/bvc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c49e683f30142084cea53622c2de1601dad2738371517a93379c66032d8ded8 +size 145479903 diff --git a/models/bvc/config.json b/models/bvc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bvc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bvc/vocab.txt b/models/bvc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f0f361e38a9b56ae59941283668f610aa0febe18 --- /dev/null +++ b/models/bvc/vocab.txt @@ -0,0 +1,32 @@ +f +6 +n +s +m +9 +3 +e +g +5 +u +4 +t +r +0 +v +' +l +k +h +d +1 +o +p +a +b +w +j +i +_ +2 + diff --git a/models/bvz/G_100000.pth b/models/bvz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c667d2eadd363220b801066b4fb3990af5d0dbb4 --- /dev/null +++ b/models/bvz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:669bd2fcf4eb332aa11c4e6359228452f74decdd9cca524f41e32802ba4604d1 +size 145476059 diff --git a/models/bvz/config.json b/models/bvz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bvz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bvz/vocab.txt b/models/bvz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24fbf42a1f3719b781711d8128dff848c9575f2e --- /dev/null +++ b/models/bvz/vocab.txt @@ -0,0 +1,27 @@ +v +r +y +m +u +j +c +d + +l +z +b +w +à +k +t +o +e +p +f +_ +s +a +g +h +i +n diff --git a/models/bwq/G_100000.pth b/models/bwq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..14ff7f58e7968149cc14ad41578afbf51e5be6ed --- /dev/null +++ b/models/bwq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00ab915645bf995aed0d31b38603243f4096235137070d94b1343ad9971d605 +size 145479158 diff --git a/models/bwq/config.json b/models/bwq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bwq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bwq/vocab.txt b/models/bwq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b6a2309852fbf889f363badec45eff939039e94b --- /dev/null +++ b/models/bwq/vocab.txt @@ -0,0 +1,31 @@ +| +a +n +ɔ +e +i +ɛ +m +y +k +w +r +t +o +b +g +s +d +u +l +p +f +z +' +h +v +́ +à +ì +- + diff --git a/models/bwu/G_100000.pth b/models/bwu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..61059aaf8b768e703d61989255c47247ec905d93 --- /dev/null +++ b/models/bwu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c663cfb6f08d4071c810f85568da53ece77eaacc6c839ecfbc7fd429335841a +size 145476193 diff --git a/models/bwu/config.json b/models/bwu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bwu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bwu/vocab.txt b/models/bwu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..10049a4fa09b6b4e084621ce5f96b0cb9b84463f --- /dev/null +++ b/models/bwu/vocab.txt @@ -0,0 +1,27 @@ +| +a +i +e +n +m +l +k +y +u +b +t +w +s +o +d +g +r +p +j +h +c +z +f +v +' + diff --git a/models/byr/G_100000.pth b/models/byr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff4903c39b041ce90349d3174a82153db46004e4 --- /dev/null +++ b/models/byr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ee178a95fcea83786910f219c7603436ee270151ae98dc9425578eec578ab2 +size 145483735 diff --git a/models/byr/config.json b/models/byr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/byr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/byr/vocab.txt b/models/byr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..52fe54d42d45c2acfd9b2ffcf4206fbd447089bd --- /dev/null +++ b/models/byr/vocab.txt @@ -0,0 +1,37 @@ +l +6 +m +p +v +z +3 +9 +r +a +o +h +' +j +w +4 +- +1 +g +7 +k +y + +_ +d +n +b +8 +e +ɨ +i +t +u +2 +5 +s +0 diff --git a/models/bzh/G_100000.pth b/models/bzh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7af218420fed4903b2a34be6144c103ee89481f3 --- /dev/null +++ b/models/bzh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d1cbf9b27dafdcc4d17b381b9903288e28048304e7404260c7a53ec1d4e491 +size 145486833 diff --git a/models/bzh/config.json b/models/bzh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bzh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bzh/vocab.txt b/models/bzh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7cb6c97b6537b17a419a84c3102fafeae6e4591c --- /dev/null +++ b/models/bzh/vocab.txt @@ -0,0 +1,41 @@ +s +q +i +— + +k +o +w +j +a +v +e +u +8 +r +6 +- +ö +g +5 +m +d +7 +ë +2 +n +ḳ +l +ġ +' +1 +t +b +9 +y +3 +h +4 +0 +p +_ diff --git a/models/bzi/G_100000.pth b/models/bzi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c34591a110107c87f545b7d881ae2d46f27c10e5 --- /dev/null +++ b/models/bzi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c62518894fba47c63ce1fc00db1a3cf0956d896e457fa5880b6e0ee2fee7ec6 +size 145510663 diff --git a/models/bzi/config.json b/models/bzi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bzi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bzi/vocab.txt b/models/bzi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f0128a53603b70beccf854bcdaddf65cdcd4ece9 --- /dev/null +++ b/models/bzi/vocab.txt @@ -0,0 +1,72 @@ +ี +ึ +่ +ร +ผ +โ +้ +ณ +า +ฺ +ค +q +ศ +7 +เ +ิ +ื +็ +น +4 +_ +- +' +ํ +ภ +ห + +ต +ด +8 +0 +ท +ฮ +ฎ +9 +๋ +1 +m +ง +ล +ม +บ +พ +ไ +ั +ุ +6 +ป +ย +แ +‍ +ฬ +ฟ +2 +ช +3 +ญ +ะ +ข +ู +ฐ +ส +อ +ว +5 +ษ +์ +ธ +ก +ซ +จ +ถ diff --git a/models/bzj/G_100000.pth b/models/bzj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..888aa74eb1ef7e4d846920483994f2949c2c5d23 --- /dev/null +++ b/models/bzj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0d2b503d3b1c3437dae13c1bfc358291accb857a655c2c58ce00a923c400e7d +size 145477599 diff --git a/models/bzj/config.json b/models/bzj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/bzj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/bzj/vocab.txt b/models/bzj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1c60f9eacf1a28cfb9fce395744e1d2b80f76071 --- /dev/null +++ b/models/bzj/vocab.txt @@ -0,0 +1,29 @@ +| +a +n +e +h +i +d +o +s +w +t +l +u +r +k +m +f +p +g +y +b +z +v +j +c +- +' +— + diff --git a/models/caa/G_100000.pth b/models/caa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c37eed278a337ef77586b25f933c0caa36ceafe9 --- /dev/null +++ b/models/caa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb539af45b0c8294bf211c829b88599a74dae1b2c88b33e194e7e41b504aa80 +size 145490003 diff --git a/models/caa/config.json b/models/caa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/caa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/caa/vocab.txt b/models/caa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8b525c390d398c0d03188e397174445d8b380def --- /dev/null +++ b/models/caa/vocab.txt @@ -0,0 +1,45 @@ +| +a +e +u +t +n +i +' +r +o +c +m +y +b +s +j +x +h +p +q +w +l +d +z +g +ú +á +í +— +é +f +v +ó +0 +1 +2 +5 +4 +k +3 +6 +- +9 +7 + diff --git a/models/cab/G_100000.pth b/models/cab/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2eb3ed3854fadb5eb77f6a232e74a18fceca640b --- /dev/null +++ b/models/cab/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01300f47fb418b77694bef32d7fd097a31d7326af4638ce808bc090536fdcbc5 +size 145480685 diff --git a/models/cab/config.json b/models/cab/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cab/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cab/vocab.txt b/models/cab/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..223fed834bd3cad583f116d8731582331e2dc31f --- /dev/null +++ b/models/cab/vocab.txt @@ -0,0 +1,33 @@ +| +a +u +i +n +l +g +e +r +b +h +d +m +s +ü +t +ñ +á +í +w +f +o +é +k +ú +y +c +ó +p +— +ǘ +- + diff --git "a/models/cac-dialect_sanmateoixtat\303\241n/G_100000.pth" "b/models/cac-dialect_sanmateoixtat\303\241n/G_100000.pth" new file mode 100644 index 0000000000000000000000000000000000000000..a6052173d4b462a4c572ca2a3998c106201e214f --- /dev/null +++ "b/models/cac-dialect_sanmateoixtat\303\241n/G_100000.pth" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca5e728b82fe69857df9cfe73d0f117e68783c7c03d0f6ea263fe3aa5874c7fb +size 145489143 diff --git "a/models/cac-dialect_sanmateoixtat\303\241n/config.json" "b/models/cac-dialect_sanmateoixtat\303\241n/config.json" new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ "b/models/cac-dialect_sanmateoixtat\303\241n/config.json" @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git "a/models/cac-dialect_sanmateoixtat\303\241n/vocab.txt" "b/models/cac-dialect_sanmateoixtat\303\241n/vocab.txt" new file mode 100755 index 0000000000000000000000000000000000000000..54f72d3adbd0eb8ef83e48a722d9d07f60c6b58c --- /dev/null +++ "b/models/cac-dialect_sanmateoixtat\303\241n/vocab.txt" @@ -0,0 +1,44 @@ +q +1 +b +3 +a +e +c +d +p +n +m +_ +4 +ú +é +ó +h +7 +y +i +g + +t +z +o +' +j +v +0 +8 +x +- +̈ +6 +9 +l +2 +u +s +í +f +5 +á +r diff --git "a/models/cac-dialect_sansebasti\303\241ncoat\303\241n/G_100000.pth" "b/models/cac-dialect_sansebasti\303\241ncoat\303\241n/G_100000.pth" new file mode 100644 index 0000000000000000000000000000000000000000..3c8a715cd60a84121f549d6d8c17d114fa5b0fb4 --- /dev/null +++ "b/models/cac-dialect_sansebasti\303\241ncoat\303\241n/G_100000.pth" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9cf31aae474c2f9c4ab16dafbe4f7e0e7774ad12f2117e47501f2c5fd8fda74 +size 145491413 diff --git "a/models/cac-dialect_sansebasti\303\241ncoat\303\241n/config.json" "b/models/cac-dialect_sansebasti\303\241ncoat\303\241n/config.json" new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ "b/models/cac-dialect_sansebasti\303\241ncoat\303\241n/config.json" @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git "a/models/cac-dialect_sansebasti\303\241ncoat\303\241n/vocab.txt" "b/models/cac-dialect_sansebasti\303\241ncoat\303\241n/vocab.txt" new file mode 100755 index 0000000000000000000000000000000000000000..453816d9779aeea24b5f64f1b00ebfb745aff237 --- /dev/null +++ "b/models/cac-dialect_sansebasti\303\241ncoat\303\241n/vocab.txt" @@ -0,0 +1,47 @@ +| +a +' +c +n +i +t +e +o +j +l +y +u +s +x +b +h +w +m +z +̈ +p +q +r +d +ú +g +— +í +é +v +á +f +ó +- +4 +2 +0 +1 +9 +3 +6 +7 +5 +8 +k + diff --git a/models/cak-dialect_central/G_100000.pth b/models/cak-dialect_central/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..43d96627685c94fcec3d4be1be099b40d689f57d --- /dev/null +++ b/models/cak-dialect_central/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec67df82d5d0152c89efa831f3925f266e2b2a8cf3a16e2db7238cb792d73bcf +size 145490017 diff --git a/models/cak-dialect_central/config.json b/models/cak-dialect_central/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cak-dialect_central/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cak-dialect_central/vocab.txt b/models/cak-dialect_central/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e8ac3a0c5b5e898075f80c08de30f4ee411718c2 --- /dev/null +++ b/models/cak-dialect_central/vocab.txt @@ -0,0 +1,45 @@ +| +i +a +r +' +n +k +e +j +o +c +h +u +t +l +q +x +y +m +w +b +s +p +- +z +d +á +ú +í +g +é +f +v +ó +0 +4 +1 +2 +5 +6 +7 +3 +à +ì + diff --git "a/models/cak-dialect_santamar\303\255adejes\303\272s/G_100000.pth" "b/models/cak-dialect_santamar\303\255adejes\303\272s/G_100000.pth" new file mode 100644 index 0000000000000000000000000000000000000000..45d2fb749296b2e477519bdcbbcff82878ed95c4 --- /dev/null +++ "b/models/cak-dialect_santamar\303\255adejes\303\272s/G_100000.pth" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a90869e4e49b91d470577acf7918ba05878ca57a2f2d41c37cac128eac0cc12d +size 145482339 diff --git "a/models/cak-dialect_santamar\303\255adejes\303\272s/config.json" "b/models/cak-dialect_santamar\303\255adejes\303\272s/config.json" new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ "b/models/cak-dialect_santamar\303\255adejes\303\272s/config.json" @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git "a/models/cak-dialect_santamar\303\255adejes\303\272s/vocab.txt" "b/models/cak-dialect_santamar\303\255adejes\303\272s/vocab.txt" new file mode 100755 index 0000000000000000000000000000000000000000..b27d78655248afdb2864bf2f7d204c95ec3e112a --- /dev/null +++ "b/models/cak-dialect_santamar\303\255adejes\303\272s/vocab.txt" @@ -0,0 +1,35 @@ +| +i +a +' +r +n +c +u +e +o +j +t +h +x +s +m +l +k +y +q +p +v +- +b +d +z +f +á +g +í +ú +é +ó +ñ + diff --git a/models/cak-dialect_santodomingoxenacoj/G_100000.pth b/models/cak-dialect_santodomingoxenacoj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..53e20aad11836cdbd8e4e980884854a40a64fa5d --- /dev/null +++ b/models/cak-dialect_santodomingoxenacoj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c8a28aa5caf67a05e6e8da4a51d8812c3bbea7b9d54c78a2afda228a45f490 +size 145490781 diff --git a/models/cak-dialect_santodomingoxenacoj/config.json b/models/cak-dialect_santodomingoxenacoj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cak-dialect_santodomingoxenacoj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cak-dialect_santodomingoxenacoj/vocab.txt b/models/cak-dialect_santodomingoxenacoj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..499ca83cc3894af358cac4c174d5fc07274c1270 --- /dev/null +++ b/models/cak-dialect_santodomingoxenacoj/vocab.txt @@ -0,0 +1,46 @@ +0 +n +t +é +6 +c +w +ó +í +o +f +a +e +h +_ +l +4 +9 +i +v +q +1 + +g +ñ +- +2 +u +m +3 +5 +z +j +r +p +ꞌ +s +8 +k +d +ú +á +x +y +b +7 diff --git a/models/cak-dialect_southcentral/G_100000.pth b/models/cak-dialect_southcentral/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d054b5777877981823890d103422fc446546545d --- /dev/null +++ b/models/cak-dialect_southcentral/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c2cdebd888a86fa08f82cef2b3098f28c8e602227a11f4d682a810b7e471855 +size 145482207 diff --git a/models/cak-dialect_southcentral/config.json b/models/cak-dialect_southcentral/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cak-dialect_southcentral/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cak-dialect_southcentral/vocab.txt b/models/cak-dialect_southcentral/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b0a46e064b355cb8fb9ba2a713b38efd71239934 --- /dev/null +++ b/models/cak-dialect_southcentral/vocab.txt @@ -0,0 +1,35 @@ +| +i +a +r +n +c +u +' +e +o +j +h +t +l +m +x +s +q +k +y +p +v +- +b +z +d +á +f +g +ú +í +é +ó +ñ + diff --git a/models/cak-dialect_western/G_100000.pth b/models/cak-dialect_western/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fa2a3ab0f7af0ac9929dc457e1757e99d61b02a --- /dev/null +++ b/models/cak-dialect_western/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9a710189bd0bef6a458ac66ae3a6439fa0481877acdccf068e9f472ea8ba455 +size 145482985 diff --git a/models/cak-dialect_western/config.json b/models/cak-dialect_western/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cak-dialect_western/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cak-dialect_western/vocab.txt b/models/cak-dialect_western/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..43dcfbc52feed86b7af6cc9ef7f53b7e4787c412 --- /dev/null +++ b/models/cak-dialect_western/vocab.txt @@ -0,0 +1,36 @@ +| +i +a +c +r +' +e +u +n +h +j +o +t +y +l +x +k +m +q +s +b +w +p +z +d +f +á +ú +í +g +ó +é +v +- +ñ + diff --git a/models/cak-dialect_yepocapa/G_100000.pth b/models/cak-dialect_yepocapa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6aa84116d7e9b6f90823ed5467054235db726cbc --- /dev/null +++ b/models/cak-dialect_yepocapa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82fb08b11be186f59b1c9c43be153f16c53933e21d48315b29692144e14da17 +size 145477509 diff --git a/models/cak-dialect_yepocapa/config.json b/models/cak-dialect_yepocapa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cak-dialect_yepocapa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cak-dialect_yepocapa/vocab.txt b/models/cak-dialect_yepocapa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..012e5c491b6739aea7b935e535e0f32e71cbec7d --- /dev/null +++ b/models/cak-dialect_yepocapa/vocab.txt @@ -0,0 +1,29 @@ +| +a +e +i +n +c +r +' +u +o +j +h +t +y +m +x +s +k +l +q +b +- +p +v +z +d +f +g + diff --git a/models/cap/G_100000.pth b/models/cap/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..80198f8522984344cd169ebb4e96aa5f7de01a10 --- /dev/null +++ b/models/cap/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:733bacd2fc9409f16c7cd09ada71768d4567457b670635e004a48f52b7569a78 +size 145489235 diff --git a/models/cap/config.json b/models/cap/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cap/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cap/vocab.txt b/models/cap/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b219601e3afbbe7ebd1cfc41592fd34fd01b7fef --- /dev/null +++ b/models/cap/vocab.txt @@ -0,0 +1,44 @@ +a +| +i +n +h +k +t +ś +ź +u +l +ć +q +e +c +p +s +m +r +o +w +x +ñ +y +j +ï +ö +ä +' +d +– +ë +b +f +ü +í +é +g +z +ú +v +ó +á + diff --git a/models/car/G_100000.pth b/models/car/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e9aff414b5419941942060eb0fc439fd6dc3603 --- /dev/null +++ b/models/car/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ef45a01a6cb68cec482eb263f4d4047a52b17fc3c7e71898b6454a60515eec8 +size 145476177 diff --git a/models/car/config.json b/models/car/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/car/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/car/vocab.txt b/models/car/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..daaab71a62a776f575d616fc634bab37c1f0d556 --- /dev/null +++ b/models/car/vocab.txt @@ -0,0 +1,27 @@ +| +o +a +n +m +r +' +y +k +e +t +i +p +u +w +s +j +b +d +g +l +f +h +- +v +z + diff --git a/models/cas/G_100000.pth b/models/cas/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1b1415f36853f0724f7383924a1f4de2d5e0286b --- /dev/null +++ b/models/cas/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c1ad73c09685bcc411f3740af6bbd016944105524200a065c6db829194414fb +size 145500670 diff --git a/models/cas/config.json b/models/cas/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cas/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cas/vocab.txt b/models/cas/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dd7b4839455a664e039877eb3fa347c03ba98718 --- /dev/null +++ b/models/cas/vocab.txt @@ -0,0 +1,59 @@ +o +m +ñ +n + +t +ÿ +ạ +ụ +ẹ +f +s +z +l +- +4 +í +á +é +q +x +h +e +p +9 +5 +i +ä +0 +́ +j +r +̂ +3 +ọ +7 +â +6 +— +8 +ị +ĉ +a +ŝ +' +ú +1 +̣ +u +_ +b +g +̈ +c +2 +v +d +y +ó diff --git a/models/cat/G_100000.pth b/models/cat/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1629b8db7ec66068f24ba46b6b8d9f6b9f3ab536 --- /dev/null +++ b/models/cat/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b4c2b1d7df876f0bad4242c809bbb2b61b0bd132ea980aacb4252e0711203c +size 145485419 diff --git a/models/cat/config.json b/models/cat/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cat/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cat/vocab.txt b/models/cat/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..5dab4cf295eb67fc7880288b7dcc1a5292975942 --- /dev/null +++ b/models/cat/vocab.txt @@ -0,0 +1,39 @@ +z +f +i +g +m +o +r +è +h +l +v +à +u +d +ú +ç +p +s +' +é +_ +- +e +a +— +x +ü +q +t +b +í +ó +ï +ò + +c +j +n +y diff --git a/models/cax/G_100000.pth b/models/cax/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d0e7f3e5c11f9a3fb12c55c5396381af2d42b4d --- /dev/null +++ b/models/cax/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:805119a66a273b1ac2c5651f8f84a2c34f072323080e8165460c7bd4936465ad +size 145482985 diff --git a/models/cax/config.json b/models/cax/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cax/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cax/vocab.txt b/models/cax/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..74c95d48ec28098e31deb43bf460f5f5ea4f70d0 --- /dev/null +++ b/models/cax/vocab.txt @@ -0,0 +1,36 @@ +a +| +i +n +t +ɨ +u +o +k +m +x +e +s +p +r +b +h +y +ñ +c +j +l +ú +d +í +– +g +é +' +f +ó +á +v +z +q + diff --git a/models/cbc/G_100000.pth b/models/cbc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3915ffba6e7033f2cd03130d34a996b54535e91c --- /dev/null +++ b/models/cbc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b3cd09747b95145b0e66fe8549fe5c4b5a77de858feac208f0b1d354f2358c +size 145488381 diff --git a/models/cbc/config.json b/models/cbc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbc/vocab.txt b/models/cbc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b638d5205dce02f7de2124a65f4bbcdb50e9d50 --- /dev/null +++ b/models/cbc/vocab.txt @@ -0,0 +1,43 @@ +| +a +r +i +u +̶ +c +o +e +ã +m +t +n +ũ +j +p +b +ĩ +s +y +ẽ +ñ +g +q +õ +á +w +d +l +ú +í +ó +- +é +— +f +h +z +v +x +k +' + diff --git a/models/cbi/G_100000.pth b/models/cbi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa1803698674925603ce91ffcfbcad9e1fb9044a --- /dev/null +++ b/models/cbi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957e5a7842e2d83c408f29ee4b281a784b68b3f1357b3cfa33700b9b1d1e391c +size 145491423 diff --git a/models/cbi/config.json b/models/cbi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbi/vocab.txt b/models/cbi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..12c7e213219d24740b6179ac0a18fcf77f489fc9 --- /dev/null +++ b/models/cbi/vocab.txt @@ -0,0 +1,47 @@ +j +w +t +q +c +v +2 +á +z +1 +4 +8 +_ + +l +5 +ú +k +ü +0 +d +9 +i +h +s +í +é +e +ñ +n +g +u +r +b +m +6 +f +o +p +3 +- +ó +y +' +7 +x +a diff --git a/models/cbr/G_100000.pth b/models/cbr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..66e03dda76f441a579e3189b1e78b923a1aba815 --- /dev/null +++ b/models/cbr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dbda7e23c5ac4b3375fc08b2829c525a239abe7c70bbc23b84ba958db20a0b8 +size 145484427 diff --git a/models/cbr/config.json b/models/cbr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbr/vocab.txt b/models/cbr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..41de50dc327dd4c8b022be1aaf23d3d1e5fa62b6 --- /dev/null +++ b/models/cbr/vocab.txt @@ -0,0 +1,38 @@ +ñ +t +x +b +é +a +ú +́ +k +i +q +d +h + +— +r +' +y +á +ó +g +m +f +u +p +ü +l +s +ë +e +j +n +í +v +z +o +_ +c diff --git a/models/cbs/G_100000.pth b/models/cbs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9128a35dfbef757521dabefbef01371df1090f62 --- /dev/null +++ b/models/cbs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26f667a7935288c97e10d66fbb5ebc4adf1133a6a3670f871f276695b548a781 +size 145490693 diff --git a/models/cbs/config.json b/models/cbs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbs/vocab.txt b/models/cbs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..50452203e9d0248432f501acfc158fe40a626495 --- /dev/null +++ b/models/cbs/vocab.txt @@ -0,0 +1,46 @@ +a +| +n +i +u +k +e +b +s +j +t +m +d +w +y +h +p +x +c +o +r +– +l +ú +é +í +g +0 +f +á +v +ó +z +1 +2 +q +7 +4 +' +ñ +5 +3 +6 +9 +8 + diff --git a/models/cbt/G_100000.pth b/models/cbt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5357fda48d2fd91b87022111446f0548605ae8ca --- /dev/null +++ b/models/cbt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa5953456e232fe3d0f8fbf6fbc4dc336d8f6634d47bb5e7b69d363f3649193 +size 145477645 diff --git a/models/cbt/config.json b/models/cbt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbt/vocab.txt b/models/cbt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..876a416f3240b98a7a1afa721f73ec015086d125 --- /dev/null +++ b/models/cbt/vocab.txt @@ -0,0 +1,29 @@ +a +n +| +i +o +ë +t +h +u +ꞌ +p +r +c +s +y +q +m +' +á +í +— +ó +d +l +e +́ +f +b + diff --git a/models/cbu/G_100000.pth b/models/cbu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebf00435b4217b0a34b8f2c70be5ed5f3ac5f91a --- /dev/null +++ b/models/cbu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:703f05326c922a54f2d421fefc04b22e19c31ddd2f0ca03405b4e82b1732c35b +size 145489143 diff --git a/models/cbu/config.json b/models/cbu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbu/vocab.txt b/models/cbu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ee0f60bd9e1338d5db93f839a9f1bb52d8500287 --- /dev/null +++ b/models/cbu/vocab.txt @@ -0,0 +1,44 @@ +a +| +i +n +s +t +u +r +h +p +m +y +k +e +g +c +l +w +o +z +á +d +b +' +j +í +x +ú +ó +f +0 +1 +v +2 +é +4 +5 +6 +3 +- +7 +9 +8 + diff --git a/models/cbv/G_100000.pth b/models/cbv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..94baf4ebcef2f5420167ddd792ebe8942fba29a9 --- /dev/null +++ b/models/cbv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5da1eb49c671a67a7ff1c1486be8a6012380a6e37f42d1b4ca8cfa6201c8834f +size 145491445 diff --git a/models/cbv/config.json b/models/cbv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cbv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cbv/vocab.txt b/models/cbv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7d0b91843f97cd561a5669b71184026d25f7952b --- /dev/null +++ b/models/cbv/vocab.txt @@ -0,0 +1,47 @@ +| +a +i +h +n +t +j +ʉ +e +c +á +d +p +b +o +w +ã +í +m +́ +ĩ +é +y +s +ó +ñ +u +r +̃ +g +l +q +ú +ẽ +' +— +f +v +z +õ +x +k +ò +ç +4 +1 + diff --git a/models/cce/G_100000.pth b/models/cce/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf957afab22a3616b2860ec32d2492ecd978ec6c --- /dev/null +++ b/models/cce/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be7a2001e2398d6c1c02c4d5e6a95ea4b78f2d4be601aefee6bd0e651989fe42 +size 145479147 diff --git a/models/cce/config.json b/models/cce/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cce/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cce/vocab.txt b/models/cce/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a0d617da898379efc3ef85442e1d7a8d7ee8610a --- /dev/null +++ b/models/cce/vocab.txt @@ -0,0 +1,31 @@ +| +a +i +n +u +t +k +e +o +m +h +w +s +l +g +v +d +x +y +b +r +z +p +f +j +' +ñ +- +é +` + diff --git a/models/cco/G_100000.pth b/models/cco/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e2184834b995b8d51e692876d069c5aaf77417f --- /dev/null +++ b/models/cco/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:112bbb87a75a46fa778c7fda282b4030677156ef54feb71c73ddc9067423295b +size 145491461 diff --git a/models/cco/config.json b/models/cco/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cco/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cco/vocab.txt b/models/cco/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..442304f9b1c2fb5516eda7a8b5bca4a1b26c8d11 --- /dev/null +++ b/models/cco/vocab.txt @@ -0,0 +1,47 @@ +| +a +' +i +e +n +j +̱ +ˋ +ɨ +o +g +u +l +s +d +c +ˉ +̃ +ˊ +m +é +á +í +f +́ +r +q +b +t +ˆ +ʉ +ñ +ó +ú +ø +ǿ +ü +y +h +— +p +z +v +x +k + diff --git a/models/cdj/G_100000.pth b/models/cdj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9a666a7a313d2a8f8f597c72352710da8cd739a --- /dev/null +++ b/models/cdj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd98b3055b892259a6e55fd91020f3034473791ce2ce03dd192e41c237f280a +size 145506025 diff --git a/models/cdj/config.json b/models/cdj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cdj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cdj/vocab.txt b/models/cdj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..270abca6ede2111556c14969c48b5e68c1d5ec68 --- /dev/null +++ b/models/cdj/vocab.txt @@ -0,0 +1,66 @@ +| +ा +र +ै +ि +त +ं +क +् +ी +न +स +े +म +य +ु +ह +ज +प +व +ण +ल +ब +श +अ +ई +द +ड +ग +ू +ो +़ +ऐ +भ +आ +च +उ +झ +थ +ख +ौ +ध +छ +ठ +ट +फ +- +ष +घ +ए +इ +ँ +ढ +ञ +ऊ +औ +‍ +ृ +ओ +ः +7 +5 +' +3 +0 + diff --git a/models/ceb/G_100000.pth b/models/ceb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ddfa9876f7302e4af9cdd2354671e66bbe714d32 --- /dev/null +++ b/models/ceb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af8bef76b179d13524752a1597207cab9868663e383fe0cfca6dffd47037c47a +size 145486725 diff --git a/models/ceb/config.json b/models/ceb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ceb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ceb/vocab.txt b/models/ceb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bb8f031ff4cea6fe0ad67a975839ab8ab87115d4 --- /dev/null +++ b/models/ceb/vocab.txt @@ -0,0 +1,41 @@ +0 +j +z +d +6 +s +g +w +c +- +' +9 +u +v +r +2 +e +q +5 +t +h +— +8 +1 +a +p +x + +_ +l +n +m +4 +i +k +3 +o +7 +b +f +y diff --git a/models/ceg/G_100000.pth b/models/ceg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3134fbab5d932bfa7ef2522c9c5913b378027ce5 --- /dev/null +++ b/models/ceg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882cd08fff848942fdf727f14034fe8c07e5e0b80a5c82c2446588c863670b6c +size 145489885 diff --git a/models/ceg/config.json b/models/ceg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ceg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ceg/vocab.txt b/models/ceg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9dda0e97a315e703bee8396f0ba66181182e17fe --- /dev/null +++ b/models/ceg/vocab.txt @@ -0,0 +1,45 @@ +| +e +o +h +r +ɨ +a +s +t +u +k +i +y +n +j +p +c +l +m +w +õ +d +b +z +ĩ +̵ +ũ +ã +ẽ +— +ú +g +é +f +í +ó +á +v +q +̃ +- +x +ñ +' + diff --git a/models/cek/G_100000.pth b/models/cek/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfcdc119d24f0216fe7f3dfe5727f42dee0fc618 --- /dev/null +++ b/models/cek/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24463432fcf863cb5006c87d4246e6b7c913766908e321141805c55afdbb6e26 +size 145483773 diff --git a/models/cek/config.json b/models/cek/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cek/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cek/vocab.txt b/models/cek/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..995a8f03daf5f83ed178b19147f17b7e147fe8b0 --- /dev/null +++ b/models/cek/vocab.txt @@ -0,0 +1,37 @@ +r +q +x +k +n +b +j +f +g +' +z +p +3 +ä +s +h +a +w +1 +e +ü +ö +2 +m +d +0 +t +_ +c +o +y +l +i +- +u + +v diff --git a/models/cfm/G_100000.pth b/models/cfm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb224b5f2f9ad2ca85377e12d322b734e7b66c3e --- /dev/null +++ b/models/cfm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1e3d913a7117b832c467af4971ca43c82d77a8683799abb59a56e286976eaa +size 145481463 diff --git a/models/cfm/config.json b/models/cfm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cfm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cfm/vocab.txt b/models/cfm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4adfa61ba1d9506ecc181e02418e37f49e3ae58f --- /dev/null +++ b/models/cfm/vocab.txt @@ -0,0 +1,34 @@ +| +a +n +h +i +u +t +l +k +m +e +g +s +r +p +c +w +o +d +z +b +ò +f +v +j +- +y +x +q +' +2 +­ +1 + diff --git a/models/cgc/G_100000.pth b/models/cgc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e3fd528d25dc9de669fd6da81ffa1aac80a22d6 --- /dev/null +++ b/models/cgc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2abb0205ea459ce6bad34ab85c0f781d2aba8ad2d408b3f580120717feebfc +size 145479045 diff --git a/models/cgc/config.json b/models/cgc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cgc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cgc/vocab.txt b/models/cgc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..567755918e34f477a7bdb468cb1e812d087c3b90 --- /dev/null +++ b/models/cgc/vocab.txt @@ -0,0 +1,31 @@ +j +- +b +k +d +' +m +x +f +n +p +e +u +s +z +v +o +c +l +y + +r +i +6 +t +_ +w +q +h +g +a diff --git a/models/che/G_100000.pth b/models/che/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7afff42450a217055fb355fdbcf56a133425b878 --- /dev/null +++ b/models/che/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:268bbd7014c061c48e08c633862ad9f762b1b9d656bbee5597354d4b7ec7e30b +size 145482973 diff --git a/models/che/config.json b/models/che/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/che/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/che/vocab.txt b/models/che/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a5251f6f291a341c3a953d39e3952a56207c6481 --- /dev/null +++ b/models/che/vocab.txt @@ -0,0 +1,36 @@ +р +ж +о +э +- +ц +т +х +ч +' +и +ю +м +б +_ +у +з +е +с +ӏ +г +л + +а +я +в +ъ +– +ш +н +й +ф +ь +д +к +п diff --git a/models/chf/G_100000.pth b/models/chf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e07e9ef5b796017b1dc15300e5e31c5a77014ce --- /dev/null +++ b/models/chf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98ba1c033cd57136065c4f8db56037fb50d8184499b6821c037a0ee6fd1b0cb2 +size 145483101 diff --git a/models/chf/config.json b/models/chf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/chf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/chf/vocab.txt b/models/chf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4824452b21a4239d5fa3dbe75bdd54aba22a3479 --- /dev/null +++ b/models/chf/vocab.txt @@ -0,0 +1,36 @@ +k +m +g +_ +d +w +h +ó +ñ +z +' +i +j +q +s +t +b +p +e +l +é +r +o +í +ú +ä +u +v +c +f + +á +n +y +x +a diff --git a/models/chv/G_100000.pth b/models/chv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6870938f0c08bbc68cf583820afcdef5de7a11e3 --- /dev/null +++ b/models/chv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7707781c7bb1aead3c0a49d0fbbf8da10fdc749accb7f44c6858910927d019de +size 145485275 diff --git a/models/chv/config.json b/models/chv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/chv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/chv/vocab.txt b/models/chv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a64635e10fd2f7cc8775709483abbb8ac8b819b5 --- /dev/null +++ b/models/chv/vocab.txt @@ -0,0 +1,39 @@ +| +а +н +е +р +ӑ +т +ӗ +с +л +п +и +у +м +х +к +ҫ +в +ч +ш +ы +й +э +ӳ +о +- +я +– +ь +ю +д +г +ф +з +щ +б +ж +ц + diff --git a/models/chz/G_100000.pth b/models/chz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8b0ad66fbb46900bd180dbfd1ea506ccbc36d4f --- /dev/null +++ b/models/chz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36cfaa7990573bc030cb7e66dd2c422b004706aeb256d574c0e783293327fdfa +size 145494503 diff --git a/models/chz/config.json b/models/chz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/chz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/chz/vocab.txt b/models/chz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6bb6023ef4fac8039ef43bd3b9ac5f5bbccc32ae --- /dev/null +++ b/models/chz/vocab.txt @@ -0,0 +1,51 @@ +| +a +h +̱ +ˉ +e +j +ꜙ +i +ˊ +ä +n +l +ꜗ +o +ꜘ +k +m +y +ë +s +g +d +u +ɨ +ˈ +t +w +c +ø +ʉ +b +r +ñ +ˋ +̃ +p +ꜚ +ú +— +é +í +ó +á +f +ʼ +z +v +q +x + diff --git a/models/cjo/G_100000.pth b/models/cjo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9ca62a42a7292750622c7c198ef23ebd730f7bd --- /dev/null +++ b/models/cjo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a34ea4c203964f4656426042c654364760c0cdf71d34c393981d0a80b6cc0b +size 145482999 diff --git a/models/cjo/config.json b/models/cjo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cjo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cjo/vocab.txt b/models/cjo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9ce440a52eb59f023e022ca8b79df933f2c02a1f --- /dev/null +++ b/models/cjo/vocab.txt @@ -0,0 +1,36 @@ +e +1 +c +3 +o +s +n +h +_ +0 +4 +2 +k +— +6 +j +u +z +p +5 +m +g +a +v +w +ñ +9 + +r +t +8 +y +i +- +7 +' diff --git a/models/cjp/G_100000.pth b/models/cjp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc9fe07284fe65c600fc963450a3dff6d0b12900 --- /dev/null +++ b/models/cjp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf8e4fbfb6c69b06d836cd0043218dfab468a0242a94be7d531f9dee79baeb3 +size 145488481 diff --git a/models/cjp/config.json b/models/cjp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cjp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cjp/vocab.txt b/models/cjp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b973d39307a299de638f718881bc71879eedc82 --- /dev/null +++ b/models/cjp/vocab.txt @@ -0,0 +1,43 @@ +| +a +i +k +e +̱ +ä +s +j +t +w +r +l +n +b +u +y +m +o +h +d +ñ +p +c +ö +á +à +í +ë +ú +é +— +g +v +f +z +ó +q +' +x +è +­ + diff --git a/models/cjs/G_100000.pth b/models/cjs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..650b8475fe41746cc4ddabff20eba6c7b2fa38e7 --- /dev/null +++ b/models/cjs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649ed56657e34e0806ed145fe3d95dfd92412973a040c9c24367473dd28ee022 +size 145485283 diff --git a/models/cjs/config.json b/models/cjs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cjs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cjs/vocab.txt b/models/cjs/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f9a57891af534ba1b7925ebd28fbffbc04c34ad --- /dev/null +++ b/models/cjs/vocab.txt @@ -0,0 +1,39 @@ +| +а +е +и +р +н +ы +т +л +п +д +с +о +қ +ч +ғ +к +ң +м +у +й +з +б +г +– +ӱ +ш +ӧ +ж +э +- +ф +в +я +ё +х +ь +ц + diff --git a/models/cko/G_100000.pth b/models/cko/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..35de5066abb9004ea3cfae475deaebf24b38508b --- /dev/null +++ b/models/cko/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9ca2c20446ecdb290a28754e73e78f619ec7864585d231cef3d8e009dd20ce4 +size 145483015 diff --git a/models/cko/config.json b/models/cko/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cko/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cko/vocab.txt b/models/cko/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b581fb2225b988dd3f1f98c134f0dbe25b93001d --- /dev/null +++ b/models/cko/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +n +m +ɛ +u +b +k +r +o +ɔ +s +y +e +t +d +w +f +ŋ +g +á +j +l +p +h +c +z +v +ó +í +ú +́ +é +' + diff --git a/models/ckt/G_100000.pth b/models/ckt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e3ea07fdc2bc43bbd4c193cf7b6dc6fa3ded82d --- /dev/null +++ b/models/ckt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:946dd54af4afd9ad048073e1efd7005c98812456fef0ab96f2a8df198d7f3847 +size 145485317 diff --git a/models/ckt/config.json b/models/ckt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ckt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ckt/vocab.txt b/models/ckt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c274c4d49e5855cd0bdcc0e29d8a3232809dcc06 --- /dev/null +++ b/models/ckt/vocab.txt @@ -0,0 +1,39 @@ +ы +| +н +э +т +а +и +к +р +ԓ +м +г +в +ъ +ӈ +ӄ +о +у +ь +ч +е +й +п +ё +с +я +- +– +ю +л +' +д +ф +з +х +б +ж +ц + diff --git a/models/cla/G_100000.pth b/models/cla/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3afdf8bdfcd63dad743c7f59cba8e68956663fc6 --- /dev/null +++ b/models/cla/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f97c2b81fe4a22e2b2c08264a3e8183f2fcd6480c62572c140928c44dc8ee7cc +size 145479913 diff --git a/models/cla/config.json b/models/cla/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cla/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cla/vocab.txt b/models/cla/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0d7ed157688845e34cb370a608d8cf66f794b520 --- /dev/null +++ b/models/cla/vocab.txt @@ -0,0 +1,32 @@ +| +a +i +m +s +n +t +u +e +k +h +w +l +y +o +f +g +r +ɓ +d +ɗ +' +b +p +c +j +z +v +- +2 +0 + diff --git a/models/cle/G_100000.pth b/models/cle/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac1f4d8fdd92f1e0add85a3b625a50c5d7117c6f --- /dev/null +++ b/models/cle/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9769e65c2e05dd3a8052a9de8b2e5fb80a25e20cfd3282d23dbc637c07637df +size 145489145 diff --git a/models/cle/config.json b/models/cle/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cle/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cle/vocab.txt b/models/cle/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b3bc98d4ec1eebbe14a8a57af7426881a82c6e33 --- /dev/null +++ b/models/cle/vocab.txt @@ -0,0 +1,44 @@ +| +3 +h +a +i +4 +2 +á +̱ +ɨ +j +d +y +c +e +n +l +s +u +m +x +1 +g +o +í +é +b +́ +t +ú +f +ñ +ó +r +v +p +ŋ +ü +z +q +k +- +' + diff --git a/models/cly/G_100000.pth b/models/cly/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..81352d653ec19a3a3045b737fd508eaae6e11e33 --- /dev/null +++ b/models/cly/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff1be6016ba73346878a42af008eb6e252d390203ad28e836bd7fe621dd2f6a +size 145484385 diff --git a/models/cly/config.json b/models/cly/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cly/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cly/vocab.txt b/models/cly/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..74ce798610b514cccb8c3334df768848e93ee6c9 --- /dev/null +++ b/models/cly/vocab.txt @@ -0,0 +1,38 @@ +q + +z +p +— +j +m +g +f +e +b +l +n +ó +t +é +_ +- +ú +o +ü +y +x +' +u +i +h +s +á +c +d +r +a +ñ +́ +í +v +k diff --git a/models/cme/G_100000.pth b/models/cme/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7f818cfb4fb04766851d44faa68039a91231844 --- /dev/null +++ b/models/cme/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5981cde33e65b2070a7cdc657ba6131e6e385eaf018be319d7dc31bf57b56fb +size 145482991 diff --git a/models/cme/config.json b/models/cme/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cme/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cme/vocab.txt b/models/cme/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9ceeaac090f65e89f7611cee4eb7cd7fcfb78264 --- /dev/null +++ b/models/cme/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +u +ŋ +n +m +b +e +o +l +ɔ +- +d +ɛ +y +r +s +k +‐ +g +t +ã +h +c +w +f +ĩ +p +j +ũ +̃ +' +v +— + diff --git a/models/cmo-script_khmer/G_100000.pth b/models/cmo-script_khmer/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f0fe157d3e2f0d1119c47c9a1d9a29d2725fc6e --- /dev/null +++ b/models/cmo-script_khmer/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e25c92e81a41338a61e2b13a165e4776b8bbd903c478d3573943927224577242 +size 145498453 diff --git a/models/cmo-script_khmer/config.json b/models/cmo-script_khmer/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cmo-script_khmer/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cmo-script_khmer/vocab.txt b/models/cmo-script_khmer/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c7972883723a250ce2c193a652908fd3eec0e788 --- /dev/null +++ b/models/cmo-script_khmer/vocab.txt @@ -0,0 +1,56 @@ +ច +៨ +៦ +ឺ +ខ +ែ +ើ +ី +ទ +ា +ញ +ឞ +ូ +វ +ន +អ +ឆ +១ +០ +៩ +៤ +៥ +២ +រ +ោ +ៈ +ឹ +ម +ស +ផ +ប +៧ +ដ +់ +ៀ +_ +គ +៣ +ព + +យ +- +ល +ក +្ +ង +ជ +ត +៝ +េ +ៅ +ហ +ុ +ិ +ថ +ៃ diff --git a/models/cmo-script_latin/G_100000.pth b/models/cmo-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..efe8aa0447a1e97bb8256f7fee2d511ed0c72771 --- /dev/null +++ b/models/cmo-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0bd3d5cee7fea50355fd29422fdf2d2d679d72e5538d0cfecf62feae33c0fec +size 145492187 diff --git a/models/cmo-script_latin/config.json b/models/cmo-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cmo-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cmo-script_latin/vocab.txt b/models/cmo-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..89e2ad9671bbae4f2b68eddf68814ac7288229a5 --- /dev/null +++ b/models/cmo-script_latin/vocab.txt @@ -0,0 +1,48 @@ +| +n +a +h +g +m +r +u +â +k +i +t +ă +y +l +p +b +ơ +d +ô +ê +j +e +o +s +ŏ +ŭ +đ +c +ĕ +ƀ +' +̆ +v +ĭ +0 +ư +1 +- +2 +4 +5 +3 +6 +7 +9 +8 + diff --git a/models/cmr/G_100000.pth b/models/cmr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e47ecd1652ea026d8775e9f848ffd4682ce39038 --- /dev/null +++ b/models/cmr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc11993fecc971213f35ee6527508d966372fffcc51fa937fabe3dfa5c629d1e +size 145482207 diff --git a/models/cmr/config.json b/models/cmr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cmr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cmr/vocab.txt b/models/cmr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a445d47870d58adc3e5b074c072ae2c22f9ca05b --- /dev/null +++ b/models/cmr/vocab.txt @@ -0,0 +1,35 @@ +k +o + +j +0 +v +i +z +q +p +d +r +w +f +6 +e +y +' +u +- +a +s +3 +h +b +m +n +1 +l +_ +2 +g +4 +x +t diff --git a/models/cnh/G_100000.pth b/models/cnh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6df59322550cefbbdd4b419d6fdf526c3b4e28d5 --- /dev/null +++ b/models/cnh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8491a622eb99c25506cb30e559b68ca4fa87308e86422203a745debb06c37c1b +size 145478493 diff --git a/models/cnh/config.json b/models/cnh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cnh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cnh/vocab.txt b/models/cnh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..78856966f84d3ae77b47589db031841ed71ce090 --- /dev/null +++ b/models/cnh/vocab.txt @@ -0,0 +1,30 @@ +i +n +e +f +p +g +w +m +a + +o +j +t +y +- +_ +l +z +' +k +c +ṭ +— +b +s +v +d +u +h +r diff --git a/models/cni/G_100000.pth b/models/cni/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea5f46c297c9417b76c4ba9057a8a40a0a52577b --- /dev/null +++ b/models/cni/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca89a6a678eb8adfcf6fb981af7fea2a9133b17ba050daf8a106c445f055db6 +size 145479165 diff --git a/models/cni/config.json b/models/cni/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cni/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cni/vocab.txt b/models/cni/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fcdcaab81aabb9c323592cc70a105e6c01d7a308 --- /dev/null +++ b/models/cni/vocab.txt @@ -0,0 +1,31 @@ +a +i +| +e +t +n +r +o +s +c +p +q +u +m +j +h +v +y +- +0 +' +1 +2 +7 +4 +6 +5 +8 +3 +9 + diff --git a/models/cnl/G_100000.pth b/models/cnl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8677ef791883e9d89a8b8e07c69c1de316d795f1 --- /dev/null +++ b/models/cnl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb60d0548da4ef825793f0254a72116342ebb70c670ff832aa96cb06a3d06ca +size 145489901 diff --git a/models/cnl/config.json b/models/cnl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cnl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cnl/vocab.txt b/models/cnl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6c7d3c3801cc2930898722d0dc2cd77c95fb527c --- /dev/null +++ b/models/cnl/vocab.txt @@ -0,0 +1,45 @@ +1 + +y +v +r +á +z +k +ñ +ó +j +́ +n +s +_ +e +é +í +u +ŋ +ʉ +ü +h +' +g +i +̱ +a +t +x +ǿ +c +ɨ +b +q +o +ú +f +2 +ø +m +d +p +3 +l diff --git a/models/cnt/G_100000.pth b/models/cnt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..08822e8e32b90e45127ddd9c574635cd32696676 --- /dev/null +++ b/models/cnt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17c8e1a6a3059991f49e3248ecab40f55132272cf762e9192349d8003032e36d +size 145490005 diff --git a/models/cnt/config.json b/models/cnt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cnt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cnt/vocab.txt b/models/cnt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..327ba952b47338e1293e58a4a30a547ad7b7abbd --- /dev/null +++ b/models/cnt/vocab.txt @@ -0,0 +1,45 @@ +| +2 +a +' +i +n +j +u +4 +5 +3 +g +e +s +c +á +ɨ +1 +l +o +d +ë +m +t +́ +q +ó +é +ŋ +b +r +h +ú +í +p +ü +- +f +v +z +y +k +x +ñ + diff --git a/models/coe/G_100000.pth b/models/coe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb5cb1d0348fb1ea587167f25655c04c6785aff8 --- /dev/null +++ b/models/coe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd1dcc8c50d8be07e1af8c5c837bafc6908e23df0efb34ee83cece8efa377a4 +size 145487615 diff --git a/models/coe/config.json b/models/coe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/coe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/coe/vocab.txt b/models/coe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f10cbd5f129758f86e7a768d218b3797cb6f59a7 --- /dev/null +++ b/models/coe/vocab.txt @@ -0,0 +1,42 @@ +a +| +ʉ +e +' +i +c +o +h +n +r +j +s +m +p +k +u +t +ã +ñ +v +ũ +d +̃ +ẽ +ĩ +— +l +õ +ú +í +b +g +é +f +á +q +ó +z +y +x + diff --git a/models/cof/G_100000.pth b/models/cof/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3ee4cdf60b356430f980e116382a15d3873b2bef --- /dev/null +++ b/models/cof/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84daca840b3c64c46c4deef4feaf375b35c05deb14652ef7c7f71484e8e588c8 +size 145481479 diff --git a/models/cof/config.json b/models/cof/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cof/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cof/vocab.txt b/models/cof/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..444dd18c775c454ca8e99dfb1975e934a3b453d1 --- /dev/null +++ b/models/cof/vocab.txt @@ -0,0 +1,34 @@ +a +p +r +í +i +o +v +t +ñ + +é +f +j +g +ó +á +ú +m +c +h +y +d +z +b +n +u +l +x +q +— +w +_ +s +e diff --git a/models/cok/G_100000.pth b/models/cok/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..60caa39813410209ceab9118e6bbefe501adfdb3 --- /dev/null +++ b/models/cok/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11143eddb607d5ab6b7719af9c2709ccd5696a1c261037e132887fc989122ba3 +size 145483789 diff --git a/models/cok/config.json b/models/cok/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cok/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cok/vocab.txt b/models/cok/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b29d7dbd4e1b3146739b401658ac44194905f8a2 --- /dev/null +++ b/models/cok/vocab.txt @@ -0,0 +1,37 @@ +d +r +' +ñ +l +ú +a +h +x +b +m +- +c +ɨ +g +í +p +n +j +u +é +́ +o +t +ó + +q +á +s +i +v +z +f +k +y +e +_ diff --git a/models/con/G_100000.pth b/models/con/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6682d5b3eae4c4210af9e3ebdf5940c2333e062 --- /dev/null +++ b/models/con/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478cc1dfc85a610c6a11e615e9e9039d7c5fb5c7e99f6931d8788468fd386280 +size 145483753 diff --git a/models/con/config.json b/models/con/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/con/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/con/vocab.txt b/models/con/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..206db65592c40f3f778482bc6316f7dd024142fd --- /dev/null +++ b/models/con/vocab.txt @@ -0,0 +1,37 @@ +a +| +e +i +n +s +' +c +t +u +o +m +j +q +h +g +p +f +d +y +ñ +b +v +r +ú +l +— +é +- +z +á +í +ó +x +k +ḿ + diff --git a/models/cot/G_100000.pth b/models/cot/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..88ced7cf0795e044251465f4449470cd63d0c5d5 --- /dev/null +++ b/models/cot/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47fc92aa4defa211e4679d175ecd2af5fa9ab518f6da016bf434197ee9ac9ce +size 145487823 diff --git a/models/cot/config.json b/models/cot/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cot/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cot/vocab.txt b/models/cot/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..79dcd07bb8f3781220293c64019ae432697276f4 --- /dev/null +++ b/models/cot/vocab.txt @@ -0,0 +1,42 @@ +a +7 +i +r +y +5 +o +q +9 +j +u +ó +e +_ +4 +p +b +' +v +3 +é +g +2 +l +m +0 +8 +x +s +— +z +f +ñ + +6 +1 +í +n +c +h +t +d diff --git a/models/cou/G_100000.pth b/models/cou/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..df92a20c47f840fb27ffe664c31b7ec36e02f76d --- /dev/null +++ b/models/cou/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d44c8e8cef180951cd200c0c322aa7766b2c6550b915b2fc2fa74b8d9101f9a2 +size 145483615 diff --git a/models/cou/config.json b/models/cou/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cou/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cou/vocab.txt b/models/cou/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fb7ac7317698f15b06f36bd2215c95e8f55ddf8d --- /dev/null +++ b/models/cou/vocab.txt @@ -0,0 +1,37 @@ +| +a +ë +n +h +i +k +e +w +o +u +t +r +l +v +y +m +s +f +ɗ +̃ +d +ŋ +g +p +c +ỹ +ɓ +j +b +ƴ +ñ +' +- +á +— + diff --git a/models/cpa/G_100000.pth b/models/cpa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..25f4cf2a4ec139e72ae680726e187f9d51e5e86d --- /dev/null +++ b/models/cpa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066cb8b271261911a2e926ef2598e9c7e0b07d51e7ac7f734f6947ad2dd25631 +size 145487609 diff --git a/models/cpa/config.json b/models/cpa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cpa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cpa/vocab.txt b/models/cpa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..54e81302531346da54e5e333c7d2e5e2adc5ddf7 --- /dev/null +++ b/models/cpa/vocab.txt @@ -0,0 +1,42 @@ +3 +ø +́ +q +1 +ó +f +_ +b +x +l +t +u +i +n +g +ǿ +ŋ +r +k +z +ë +s +ñ +í +ú +m +j + +c +ü +2 +é +á +y +o +v +d +p +h +a +e diff --git a/models/cpb/G_100000.pth b/models/cpb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..410a914fb94ce9359cc23144a1fa305650831e92 --- /dev/null +++ b/models/cpb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8c62ac46a7d58f27e69aa004fbf3552cbe377154efa03c5da79f46fb47b0477 +size 145490574 diff --git a/models/cpb/config.json b/models/cpb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cpb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cpb/vocab.txt b/models/cpb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f4981388987c6f9ce8b2cdb37a6630e606aa1181 --- /dev/null +++ b/models/cpb/vocab.txt @@ -0,0 +1,46 @@ +_ +n +t +p +c +é +6 +s +ñ +8 + +b +í +x +h +9 +v +á +m +0 +3 +4 +z +u +q +' +w +ú +7 +5 +2 +i +l +o +f +y +g +r +j +d +a +1 +- +k +ó +e diff --git a/models/cpu/G_100000.pth b/models/cpu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..345415cf36c71d29e39fb94155e061310d115729 --- /dev/null +++ b/models/cpu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5381eaac38e8777b06fe6ace67e7e8a95e5937e2165930a63d8c9f39223baded +size 145489143 diff --git a/models/cpu/config.json b/models/cpu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cpu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cpu/vocab.txt b/models/cpu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c0fbb2e51f8d13d5c673ba2e25e2640cb1651872 --- /dev/null +++ b/models/cpu/vocab.txt @@ -0,0 +1,44 @@ +1 +g +4 +h +9 +e +2 +k +_ +x +- +r +á +ñ +m +7 +c + +n +b +0 +j +ú +5 +a +p +y +l +i +6 +v +8 +z +í +t +f +ó +3 +d +u +s +é +q +o diff --git a/models/crh/G_100000.pth b/models/crh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4eb272f6bd523494f728bbec0e0a603bbf3be9eb --- /dev/null +++ b/models/crh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fa9f633704330d54a847aa45e665af0dd381b4e5454d77c61b3910a0e1720b3 +size 145489147 diff --git a/models/crh/config.json b/models/crh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crh/vocab.txt b/models/crh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5e24f94ea558a2c53c08ae0397fdccfd871cfa7d --- /dev/null +++ b/models/crh/vocab.txt @@ -0,0 +1,44 @@ +а +6 +5 +е +ж +ы +4 +и +к +0 +р +ш +ч +д +б +з +ю +п +х +- +н +э + +ъ +й +ё +я +– +ц +м +7 +в +_ +л +о +1 +г +3 +у +ф +ь +т +2 +с diff --git a/models/crk-script_latin/G_100000.pth b/models/crk-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..98455f07165702876384e47d7b89fad23265706f --- /dev/null +++ b/models/crk-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a69d2117e81f94dab42ec61de98752fec72c797bcb5115a27d5a5133cd2c09 +size 145482999 diff --git a/models/crk-script_latin/config.json b/models/crk-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crk-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crk-script_latin/vocab.txt b/models/crk-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9036e1a75dde584019b9ed0bdff7795ca6d53cc6 --- /dev/null +++ b/models/crk-script_latin/vocab.txt @@ -0,0 +1,36 @@ +m +a +c +q +— +r +y +x +e +ē +_ + +l +o +n +d +p +' +ā +k +f +w +j +- +i +ō +h +b +g +s +ī +u +v +z +t +‐ diff --git a/models/crk-script_syllabics/G_100000.pth b/models/crk-script_syllabics/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..681a4d2d785f79f53d67b9b4b1c4544b7a26f189 --- /dev/null +++ b/models/crk-script_syllabics/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38233b18d7109e5fe31c3517d4e67bad278454ca898f8600fdaefa879922ec7f +size 145516009 diff --git a/models/crk-script_syllabics/config.json b/models/crk-script_syllabics/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crk-script_syllabics/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crk-script_syllabics/vocab.txt b/models/crk-script_syllabics/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..716e2952dece7eb53d12fa447f2375c955707f1d --- /dev/null +++ b/models/crk-script_syllabics/vocab.txt @@ -0,0 +1,79 @@ +| +ᐧ +ᑭ +ᐃ +ᐊ +ᐁ +ᓯ +ᑲ +ᑕ +ᐦ +ᓂ +ᐢ +ᒋ +ᒥ +ᐠ +ᓇ +ᑳ +ᑯ +ᔨ +ᐅ +ᒫ +ᔭ +ᒪ +ᐤ +ᐋ +ᐣ +ᐟ +ᑫ +ᑎ +ᕽ +ᐱ +ᑖ +ᑌ +ᑐ +ᔮ +ᐸ +ᐯ +ᓈ +ᓴ +ᒧ +ᓱ +ᔦ +ᓭ +ᒣ +ᐨ +ᓀ +ᓄ +ᕒ +ᐳ +ᑦ +ᓵ +ᓬ +ᒉ +ᔪ +ᒑ +ᐹ +ᐩ +ᑊ +ᒍ +ᒐ +ᐆ +ᑰ +ᑮ +h +ᒼ +ᒨ +ᓲ +ᑑ +ᓃ +ᓅ +ᓰ +ᒦ +ᐴ +ᒌ +ᐄ +t +9 +7 + diff --git a/models/crn/G_100000.pth b/models/crn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..165be77e6da0f317552622d96e6cc6efa5653cd1 --- /dev/null +++ b/models/crn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02a24c0ca6a72cedd4cc98e2f182b35de3b2e481553e30a38362ccb74674ee8 +size 145483747 diff --git a/models/crn/config.json b/models/crn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crn/vocab.txt b/models/crn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3ab197ca3d4ce02bc74be31a2138601bb8043977 --- /dev/null +++ b/models/crn/vocab.txt @@ -0,0 +1,37 @@ +a +| +' +t +u +e +i +ɨ +j +n +m +y +r +c +h +s +p +x +v +z +o +á +d +l +ú +é +í +q +b +f +g +́ +ó +ñ +k +à + diff --git a/models/crq/G_100000.pth b/models/crq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c19bcc864437979f644f3037a034ffbd31a65b6f --- /dev/null +++ b/models/crq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c39d20f60e60b97b1d4d1d86de79534b1b2eb22bd47ad560696ef58ce726c1 +size 145491421 diff --git a/models/crq/config.json b/models/crq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crq/vocab.txt b/models/crq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..55090c26dcbfc5259397513d36919cfa75061347 --- /dev/null +++ b/models/crq/vocab.txt @@ -0,0 +1,47 @@ +é +e +q +g +ó +v +p +- +t +l +z +á +f +x +o +4 +8 +2 +j +y +3 +w +0 +– +ñ +6 +' +b +u +í +5 +s +a +ú +d +_ + +9 +n +h +c +7 +1 +k +r +m +i diff --git a/models/crs/G_100000.pth b/models/crs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e52bb6d938894ed7a0fcc7ee8b2f98cfe20dd03 --- /dev/null +++ b/models/crs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56774e63dc6c0ca97dc15843aa58f6e223930fdcd8dcc8c5ef2c7f0e7c24c3e0 +size 145482990 diff --git a/models/crs/config.json b/models/crs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crs/vocab.txt b/models/crs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b24cc35103adca6da8b1829f2ecb82486752a199 --- /dev/null +++ b/models/crs/vocab.txt @@ -0,0 +1,36 @@ +9 +r + +z +n +l +5 +8 +d +t +2 +g +4 +y +m +h +b +s +0 +7 +w +' +f +a +p +k +v +e +o +1 +i +- +u +3 +6 +_ diff --git a/models/crt/G_100000.pth b/models/crt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ac4553d78d6af14486e1c5e4ceddcb97e1913de6 --- /dev/null +++ b/models/crt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79391263bff9fa866b11b45b84c048276e5e5edf734dcba8e4396a0d8dce6ac +size 145486062 diff --git a/models/crt/config.json b/models/crt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/crt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/crt/vocab.txt b/models/crt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7e05c8474d4ae3a755a278316b949cf02c2bae59 --- /dev/null +++ b/models/crt/vocab.txt @@ -0,0 +1,40 @@ +l +í +t +w +ú +- +c +r +q +9 +g +4 +m +s +á + +k +— +ñ +x +p +b +y +a +e +_ +o +' +f +ó +z +d +0 +j +h +u +v +n +é +i diff --git a/models/csk/G_100000.pth b/models/csk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a052b693ca25e9fd1dbffdae0c95617c772369a8 --- /dev/null +++ b/models/csk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3ccef64b04e60ecd9b7cd519ba87cabdfbb213f85cd439344033119693d642 +size 145481335 diff --git a/models/csk/config.json b/models/csk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/csk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/csk/vocab.txt b/models/csk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1869defd9a4dd9e25a7d3aff64ca2611446887f3 --- /dev/null +++ b/models/csk/vocab.txt @@ -0,0 +1,34 @@ +| +a +u +l +i +o +k +n +e +m +b +t +y +w +j +h +s +f +á +ú +ŋ +ñ +é +̥ +í +g +ó +ˈ +c +d +— +' +- + diff --git a/models/cso/G_100000.pth b/models/cso/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a2e441acf394eea7680d41ea8159993d71f5aa5f --- /dev/null +++ b/models/cso/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:addd68d6fcf62b1192538ee77754ca25a5f8e2d060f9bc6ce59206d793509fd3 +size 145485287 diff --git a/models/cso/config.json b/models/cso/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cso/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cso/vocab.txt b/models/cso/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..445e96c3767b5a797a0785eaa7df7e54b879fae5 --- /dev/null +++ b/models/cso/vocab.txt @@ -0,0 +1,39 @@ + +í +y +ë +g +e +f +d +é +a +v +k +ñ +j +m +o +h +ɨ +3 +́ +ó +2 +ú +z +c +t +_ +l +q +n +1 +x +b +i +s +u +p +r +á diff --git a/models/ctd/G_100000.pth b/models/ctd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c9b531a17ee0fbdadb060d33652b43223210589 --- /dev/null +++ b/models/ctd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b8b8c1117ee329ac4f258bec20a56325aa40c291ff9e272b2bd91736e59ba2 +size 145478373 diff --git a/models/ctd/config.json b/models/ctd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ctd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ctd/vocab.txt b/models/ctd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d3a50077babc1cb1aa588b8d9a35c5240521199f --- /dev/null +++ b/models/ctd/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +n +h +t +u +g +e +k +m +p +l +o +s +w +d +c +z +b +- +v +j +r +f +y +' +x +q + diff --git a/models/ctg/G_100000.pth b/models/ctg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4b01ffc37d3f21c81be6d5ef75b0b0661271715 --- /dev/null +++ b/models/ctg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bc20ea5e9e9755bb142214bc67e94822ef60aa77b2fecc80dbb0a346e4f12ba +size 145501565 diff --git a/models/ctg/config.json b/models/ctg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ctg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ctg/vocab.txt b/models/ctg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d7a890fce690aa1f8b7a723f36afd1050a542ca5 --- /dev/null +++ b/models/ctg/vocab.txt @@ -0,0 +1,60 @@ +ত +খ +ী +অ +_ +এ +ও +ঃ +ষ +— +ড +ূ +উ +গ +ে +ই + +ফ +ভ +' +ঞ +চ +ং +ঢ +ব +দ +হ +ন +য +– +ৈ +ু +থ +স +ঠ +ৌ +ট +জ +় +ি +ঐ +ল +ঝ +ঘ +ঊ +ঁ +আ +ো +া +ক +‍ +ছ +ম +প +শ +্ +ধ +- +ৃ +র diff --git a/models/cto/G_100000.pth b/models/cto/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1386a7cd910ca6c46b85b795b199756adbf1276c --- /dev/null +++ b/models/cto/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:104016be0690a0c3364741eedf1b3a5227816673e40e3c378bdf4795cb634153 +size 145487601 diff --git a/models/cto/config.json b/models/cto/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/cto/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cto/vocab.txt b/models/cto/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a9d8378040ef230403a171d2f469be8d7f968687 --- /dev/null +++ b/models/cto/vocab.txt @@ -0,0 +1,42 @@ +a +| +r +b +e +' +i +ã +ʉ +m +d +u +w +z +t +s +k +n +o +ũ +j +ẽ +h +ĩ +̃ +p +y +g +c +ú +õ +l +ñ +í +é +f +ó +á +v +- +q + diff --git a/models/ctu/G_100000.pth b/models/ctu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a02016bc4cbc7347d066f472ebb39574ecbb086d --- /dev/null +++ b/models/ctu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6098baad6e1fe8d0de53bb6425a1c7acae1c2fd96fcd13b48de3b6f83393236 +size 145483747 diff --git a/models/ctu/config.json b/models/ctu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ctu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ctu/vocab.txt b/models/ctu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9d2c49dc883de3a23d5093e00c682e788c4bdc3a --- /dev/null +++ b/models/ctu/vocab.txt @@ -0,0 +1,37 @@ +| +i +a +c +l +t +e +o +j +n +b +' +m +u +s +ʌ +h +y +ñ +p +w +q +x +r +d +ú +g +í +á +é +ó +f +z +v +ń +k + diff --git a/models/cuc/G_100000.pth b/models/cuc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..77957afda91a92f42ec4b304bc2fefe7a7c49384 --- /dev/null +++ b/models/cuc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffc3140a1b71b2dd2faaf6777d24c227c7a1f4e32f58dbe7fd025d6541943cf9 +size 145486071 diff --git a/models/cuc/config.json b/models/cuc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cuc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cuc/vocab.txt b/models/cuc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2f87826e4c4aed7b0b4716477ffa1534d1dbf386 --- /dev/null +++ b/models/cuc/vocab.txt @@ -0,0 +1,40 @@ +| +3 +i +a +4 +n +h +2 +e +j +o +u +l +c +t +1 +s +m +ˉ +q +g +5 +ú +r +y +d +p +ñ +ŋ +b +í +á +f +é +ó +v +z +x +k + diff --git a/models/cui/G_100000.pth b/models/cui/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ec07d383501a1293d5be1cd31bf7620d3861168 --- /dev/null +++ b/models/cui/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c360292160f8937730c903adbad9c1ddd57174a0b3cf65691c9875f708c8465a +size 145484549 diff --git a/models/cui/config.json b/models/cui/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cui/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cui/vocab.txt b/models/cui/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..275d4884f345cec465b4169b7b4f6eb40a1e15a4 --- /dev/null +++ b/models/cui/vocab.txt @@ -0,0 +1,38 @@ +a +é +ñ +e +i +u +z +í +j +r +w +o +' +n +s +— +f +y +t +c +ʉ + +d +l +ú +h +q +- +b +x +_ +ó +v +p +á +m +k +g diff --git a/models/cuk/G_100000.pth b/models/cuk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3fb0d30e96b7883dbfb5fa43283dfbf34ac75ee6 --- /dev/null +++ b/models/cuk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a903fe8c2971ecb44f80eaafe4548ed6847343e44ba79a19b22ba477446f35 +size 145482889 diff --git a/models/cuk/config.json b/models/cuk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cuk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cuk/vocab.txt b/models/cuk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9adf6cfdc442aa9e48ca9af719cbe590d8ec3b37 --- /dev/null +++ b/models/cuk/vocab.txt @@ -0,0 +1,36 @@ +a +| +e +d +i +g +n +b +m +- +u +r +s +o +l +k +w +y +t +j +c +p +ú +h +é +— +f +í +ó +v +á +z +q +x +' + diff --git a/models/cul/G_100000.pth b/models/cul/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4606aad72067221a457d5f286c0dc5948dff6b35 --- /dev/null +++ b/models/cul/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625065f09eda9b0f69723cf3b2f3fd23a4f9aa160a2a14e192780e53e16d5a53 +size 145486951 diff --git a/models/cul/config.json b/models/cul/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cul/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cul/vocab.txt b/models/cul/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..110d3dcd92bc40e6d64492a1391e9432156340ce --- /dev/null +++ b/models/cul/vocab.txt @@ -0,0 +1,41 @@ +' +ʼ +k +n +ñ +f +m +7 +l +o +g +— +1 +6 +9 +s +p +5 +_ +b +ó +i +- +2 +a +3 +j +e +z +v +r +c +4 +0 + +q +t +8 +h +u +d diff --git a/models/cwa/G_100000.pth b/models/cwa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c1d7c57b11abcca64de5dad67e0307497fd0f97 --- /dev/null +++ b/models/cwa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e581017ebc0d9e0653c2ad691da7e9983643ce6d64cbed4c846d0ae0d5e06957 +size 145476862 diff --git a/models/cwa/config.json b/models/cwa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cwa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cwa/vocab.txt b/models/cwa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..69a759c17dd70cd82523d1a20314ff16dc9db4c1 --- /dev/null +++ b/models/cwa/vocab.txt @@ -0,0 +1,28 @@ +n +s +k +u +t +b +f +v +i +d +p +_ +e +a +- +r +y +w +g +ú +m +j +c + +h +' +o +ʼ diff --git a/models/cwe/G_100000.pth b/models/cwe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9822e8bfebcf439b26d10a10bd0aa7a1faca60f --- /dev/null +++ b/models/cwe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa104c3581b0eb47b90eb6d5f55aedd946aac28f406a34e10ed6b075bff318a8 +size 145479161 diff --git a/models/cwe/config.json b/models/cwe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cwe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cwe/vocab.txt b/models/cwe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..34970450d5a0cd5c9a2072d1d93a3bdbaf437cd6 --- /dev/null +++ b/models/cwe/vocab.txt @@ -0,0 +1,31 @@ +2 +o + +f +s +a +j +u +- +h +3 +b +y +l +c +g +7 +e +' +m +v +k +0 +p +t +w +z +_ +i +n +d diff --git a/models/cwt/G_100000.pth b/models/cwt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cad205aa562c2b2430da15d66cfda8aa4330b639 --- /dev/null +++ b/models/cwt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f44b6aaf462bcbb6755e5003e4926423d2e6c25b8ff0ac808671f2bd997576 +size 145480695 diff --git a/models/cwt/config.json b/models/cwt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cwt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cwt/vocab.txt b/models/cwt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d2731a808e867301da4b2b1123869bb0cceef5d6 --- /dev/null +++ b/models/cwt/vocab.txt @@ -0,0 +1,33 @@ +| +a +e +i +n +u +k +o +t +b +r +m +h +y +s +' +ŋ +j +f +á +w +l +d +ñ +ú +í +é +- +g +̱ +p +ó + diff --git a/models/cya/G_100000.pth b/models/cya/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a163da6b394396485334d2816d8946de37d0f25 --- /dev/null +++ b/models/cya/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91172f4312b60f1c7830bcbf0303f29169513b7daa7ccc21ab7890f848e8f552 +size 145483785 diff --git a/models/cya/config.json b/models/cya/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cya/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cya/vocab.txt b/models/cya/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2fceebd3db72f89679a50cd9dbf339d17429322d --- /dev/null +++ b/models/cya/vocab.txt @@ -0,0 +1,37 @@ +| +a +n +' +u +i +c +o +t +y +s +e +h +l +ñ +d +m +j +x +g +q +r +b +p +ú +— +í +é +f +v +ó +z +á +ü +k +- + diff --git a/models/cym/G_100000.pth b/models/cym/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2036996fdb997c92f3803cc5b560f53702f44dc --- /dev/null +++ b/models/cym/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce7b69e19a927353a756f72ab4aef281cf9bb18059287168b4b049b6f5f232c +size 145486052 diff --git a/models/cym/config.json b/models/cym/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/cym/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/cym/vocab.txt b/models/cym/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..3fc8f1ca78eaabcec0a5f28bdd036ef32a417e54 --- /dev/null +++ b/models/cym/vocab.txt @@ -0,0 +1,40 @@ +n +d +ŷ +î +x +— +h +o +_ +s +ó + +u +- +r +' +g +a +ŵ +f +y +m +â +e +b +j +z +p +û +i +ï +l +c +ô +ö +ë +ê +w +á +t diff --git a/models/daa/G_100000.pth b/models/daa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad8bddbb6354a0d15f2d6e32fce5b4c0b3e33dfe --- /dev/null +++ b/models/daa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:976b9f0b9784e406b4d5a7bc229a182cc25c648f35eebdb6fbb049e9077cb419 +size 145479149 diff --git a/models/daa/config.json b/models/daa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/daa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/daa/vocab.txt b/models/daa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2093ddcde86898c15649b68f2f56b845670d588d --- /dev/null +++ b/models/daa/vocab.txt @@ -0,0 +1,31 @@ +| +a +i +k +n +o +u +e +r +t +g +ŋ +y +m +s +b +l +d +ɗ +w +c +- +p +̰ +j +h +z +ɓ +ƴ +' + diff --git a/models/dah/G_100000.pth b/models/dah/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..026628e87173a89a61664ed4443ca740120a0d83 --- /dev/null +++ b/models/dah/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92f216c62be73430ea867cbec2113ce1e4680317921c742f41dc35c47903f53 +size 145482985 diff --git a/models/dah/config.json b/models/dah/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dah/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dah/vocab.txt b/models/dah/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c9f0573cbe5ffecfe775acb2bbefe1dcdb4cba60 --- /dev/null +++ b/models/dah/vocab.txt @@ -0,0 +1,36 @@ +e +h +– +g +w +k +5 +8 +- +4 +' +6 +t +1 +m +0 +f +i +2 +u +_ +s +b +7 +y +n +d +p + +a +3 +9 +ŋ +o +l +r diff --git a/models/dar/G_100000.pth b/models/dar/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..993283d18e8794e336db1c83d3772f9c8fabb0a3 --- /dev/null +++ b/models/dar/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ebe48e5f9f136b374bd6c97bb312abb274b1d0c1444c99ac3732216c59d0693 +size 145483093 diff --git a/models/dar/config.json b/models/dar/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dar/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dar/vocab.txt b/models/dar/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b282c617d92350064a1560c20590e15d1e0c6ee9 --- /dev/null +++ b/models/dar/vocab.txt @@ -0,0 +1,36 @@ +ь +п +_ +б +о +ш +– +ф +м +р +ж +а +' + +н +к +ю +я +- +ъ +х +й +ч +с +г +л +з +ӏ +у +т +э +и +ц +в +е +д diff --git a/models/dbj/G_100000.pth b/models/dbj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..27cf30225f80d7b88ca99867b484a5f2da6d0175 --- /dev/null +++ b/models/dbj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1506650d6c4dd0380a829a954c556336034d26163462cd4db37418f4339237be +size 145476067 diff --git a/models/dbj/config.json b/models/dbj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dbj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dbj/vocab.txt b/models/dbj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e55d26e1c365da616c56c7544ff8f3c593630353 --- /dev/null +++ b/models/dbj/vocab.txt @@ -0,0 +1,27 @@ +' +b +l +y +f +c +n +o +i +_ +s +g +z +d +k +m +j +- +a + +p +u +e +t +r +h +w diff --git a/models/dbq/G_100000.pth b/models/dbq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..de85882a4080e07e54318bed9db6e71f404cb039 --- /dev/null +++ b/models/dbq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf5e52a7211bd97cfab2b56732cea44a995b759868013a82408d5e845175355 +size 145480695 diff --git a/models/dbq/config.json b/models/dbq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dbq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dbq/vocab.txt b/models/dbq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4b482b32beb66b499cf357c25cb648bdafdb8e65 --- /dev/null +++ b/models/dbq/vocab.txt @@ -0,0 +1,33 @@ +k +é +á +_ +l +o +c +r +ɓ +ɗ +a +' +s +i +j +ú +z +g +ə +h +y + +m +n +e +v +p +b +t +d +f +w +u diff --git a/models/ddn/G_100000.pth b/models/ddn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a01472f661d830b1ae3766dcd7aa428aef573b0 --- /dev/null +++ b/models/ddn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9f6bcba829bb22d132262a06f9441ad87558b1353f5d78519de70cbd1a5ee1 +size 145482973 diff --git a/models/ddn/config.json b/models/ddn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ddn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ddn/vocab.txt b/models/ddn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c710a5ed3100dd8e6cbefff75c8b78941b126412 --- /dev/null +++ b/models/ddn/vocab.txt @@ -0,0 +1,36 @@ +g + +t +à +f +e +̀ +ɛ +ò +k +a +d +_ +z +m +w +c +h +ɑ +ŋ +ɔ +ã +n +u +̃ +b +p +l +j +á +i +r +y +o +s +ǹ diff --git a/models/ded/G_100000.pth b/models/ded/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbe603014a3653cfc894190fb95934345978a615 --- /dev/null +++ b/models/ded/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b535ef54aa9dc99eecbed5b026eaba7920f7c7c62dea370101b24e170dc7aeca +size 145482965 diff --git a/models/ded/config.json b/models/ded/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ded/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ded/vocab.txt b/models/ded/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cffabb035278daa4e921a9ab56414c1a5884145c --- /dev/null +++ b/models/ded/vocab.txt @@ -0,0 +1,36 @@ +k +8 +n +2 +5 +e +f +w +a +r +u +1 +_ +c +0 +h +b +3 +o +9 +i +g +d +' +6 +m +— +y +p +z +t +s +l +4 +7 + diff --git a/models/des/G_100000.pth b/models/des/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc83517e5d368e816656a41e138f610426ee3bba --- /dev/null +++ b/models/des/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c97af4a8c2f20e54da95e7b33686b9d278634919cdeb5fe6b3dadfe90052aa7f +size 145490665 diff --git a/models/des/config.json b/models/des/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/des/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/des/vocab.txt b/models/des/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d02ba53f1e672c3d3dde1a913d864260b87d93bc --- /dev/null +++ b/models/des/vocab.txt @@ -0,0 +1,46 @@ +| +r +a +e +i +ʉ +o +m +g +ã +u +p +ĩ +h +s +c +b +j +d +t +y +w +n +á +ñ +̃ +õ +q +ẽ +l +ũ +ó +í +é +f +v +ü +z +1 +ú +- +k +x +‐ +̱ + diff --git a/models/deu/G_100000.pth b/models/deu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e0c25271c5987a54044623214cebcc70689e83 --- /dev/null +++ b/models/deu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf70a8d4b18a87bd237e1d8e5789f5dc3181d8cf577683cf0bacace87d44ffaa +size 145489905 diff --git a/models/deu/config.json b/models/deu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/deu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/deu/vocab.txt b/models/deu/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..fa431b6edba85e373134936bde1085c8a8305a77 --- /dev/null +++ b/models/deu/vocab.txt @@ -0,0 +1,45 @@ + +v +2 +q +g +- +f +1 +8 +a +h +4 +ö +3 +r +m +ä +l +n +t +ë +d +b +y +ß +o +u +_ +j +s +6 +5 +ï +c +i +ü +p +k +e +– +w +z +7 +x +0 diff --git a/models/dga/G_100000.pth b/models/dga/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ce9b2be20a67b8a3a9e817f2631ca1bc3af125e --- /dev/null +++ b/models/dga/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522e6182d205887426a589bd75f234e3572f11695ea5ed359d4df0957f51cc68 +size 145482331 diff --git a/models/dga/config.json b/models/dga/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dga/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dga/vocab.txt b/models/dga/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..964806cc9e691c46196b4830e4d50794bee8ae05 --- /dev/null +++ b/models/dga/vocab.txt @@ -0,0 +1,35 @@ +f +w +t +õ +ŋ +l +ɛ +' +g +o +v +k +d +n +ã +u +̃ +i +ɔ +e +z +h +p +y +ẽ +ĩ +b +_ +- +s +m +ũ + +r +a diff --git a/models/dgi/G_100000.pth b/models/dgi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8688b32ae3c2df08a3ed78553ee530a6d32da03c --- /dev/null +++ b/models/dgi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d70a41f3587d404e413ca661cfc6e048bb7ce5bc088ca1748759b152b0895d +size 145496055 diff --git a/models/dgi/config.json b/models/dgi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dgi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dgi/vocab.txt b/models/dgi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e0117e7f5893989b60b52ef1f8209f3a714abeaa --- /dev/null +++ b/models/dgi/vocab.txt @@ -0,0 +1,53 @@ +| +a +ɩ +n +ɛ +b +ʋ +r +t +l +e +w +m +y +ɔ +k +̃ +- +i +s +z +ã +p +u +o +d +ŋ +ƴ +' +f +c +́ +v +̀ +g +õ +ẽ +ũ +á +ú +ù +à +í +ɓ +h +j +é +ĩ +ì +ó +ò +è + diff --git a/models/dgk/G_100000.pth b/models/dgk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..44d348e6fa3e94b88e54e913cb3d467184654ee1 --- /dev/null +++ b/models/dgk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0c95640675f1c679d756267c99c5396071042c0ec89ae67b5d903ab4410eee +size 145490693 diff --git a/models/dgk/config.json b/models/dgk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dgk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dgk/vocab.txt b/models/dgk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0895e12a58f9797444196a0e21f2ee8e6f271dbe --- /dev/null +++ b/models/dgk/vocab.txt @@ -0,0 +1,46 @@ +é +a +ә +u +ù +ḭ +g +r +d +̰ +t +j +m +p +h +ò +ì +í +ɔ +n +l +á +_ +ɓ +w +ú +ó +ɗ +' +́ + +ə +i +2 +k +s +- +ṵ +à +o +̀ +ŋ +y +b +ɛ +e diff --git a/models/dgo/G_100000.pth b/models/dgo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e340290c38ce16b85b28dcacf2ad50cdfc09df6 --- /dev/null +++ b/models/dgo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53083e1cca399111e6de5633aba0077993a4a1fe2ec9eac01ba15026a7eef7c2 +size 145504485 diff --git a/models/dgo/config.json b/models/dgo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dgo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dgo/vocab.txt b/models/dgo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f7f258aec288dbd80011af3b44e30f1a011cd3b0 --- /dev/null +++ b/models/dgo/vocab.txt @@ -0,0 +1,64 @@ +| +े +ा +न +र +क +द +ी +त +स +ं +् +ल +प +म +ि +य +ग +ज +ै +ु +ब +च +ओ +आ +ो +ई +ड +ख +उ +़ +ऐ +अ +ए +श +व +ह +थ +ू +ट +ऊ +छ +ँ +फ +ौ +इ +भ +ध +ठ +- +झ +ढ +ष +ण +औ +घ +ञ +ृ +ः +' +ॉ +‍ +4 + diff --git a/models/dgr/G_100000.pth b/models/dgr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea5bed09ec866c7c60491b8ae26a842afb5cab75 --- /dev/null +++ b/models/dgr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfa1f663eb603d71afdea99a7d1bdf4340796a55b5acf2ff87807c4d45882eb7 +size 145489131 diff --git a/models/dgr/config.json b/models/dgr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dgr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dgr/vocab.txt b/models/dgr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2100844c53b1282d9b87c592ef87cf286e8ad670 --- /dev/null +++ b/models/dgr/vocab.txt @@ -0,0 +1,44 @@ +g +- +q +v + +f +e +y +d +ì +ʔ +s +ɂ +ǫ +z +r +a +' +ę +w +o +ò +x +̨ +m +b +l +ą +h +p +n +į +k +è +c +u +ı +ł +̀ +_ +à +t +i +j diff --git a/models/dhi/G_100000.pth b/models/dhi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..72c88fe0bf5ca86446cf26c8952cc36c8f81a03a --- /dev/null +++ b/models/dhi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f0d106367467740300b1b9cad107bf775cf55ef1a18a75c376bab7466ab94d +size 145498355 diff --git a/models/dhi/config.json b/models/dhi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dhi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dhi/vocab.txt b/models/dhi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5b536657890e4fb3be1a964a63fd91142f1b1fed --- /dev/null +++ b/models/dhi/vocab.txt @@ -0,0 +1,56 @@ +ा +| +ि +क +े +् +ल +ो +ङ +ह +इ +स +न +र +त +प +ु +म +द +ब +य +ग +उ +व +ख +ज +अ +भ +आ +च +थ +ध +घ +‍ +ए +फ +ओ +ट +झ +- +ड +छ +ै +ठ +ँ +ढ +ौ +ृ +' +औ +ऐ +ण +श +़ +ं + diff --git a/models/did/G_100000.pth b/models/did/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f12d3e7af1225b0dc202a2e848b1e3093a4aa6c --- /dev/null +++ b/models/did/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757132f99195bcfeba70d7d8382439b9847570bbef7a2b901891b97d4b85a599 +size 145482191 diff --git a/models/did/config.json b/models/did/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/did/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/did/vocab.txt b/models/did/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bc193506e9cdfce726100fe6c5a9e6b8d6fc12e4 --- /dev/null +++ b/models/did/vocab.txt @@ -0,0 +1,35 @@ +i +v +- +ĩ +a + +ũ +d +ꞌ +c +1 +õ +e +k +s +6 +r +w +m +g +t +b +o +u +_ +ẽ +h +á +n +p +j +' +l +y +ú diff --git a/models/dig/G_100000.pth b/models/dig/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4ba659eeaea3b1a3cbe0d44a60b5248242d74ef4 --- /dev/null +++ b/models/dig/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6fd34d3cbd23f95773258f9c34dbbd299759b0dc05554d78e8c3f3b0e129141 +size 145479929 diff --git a/models/dig/config.json b/models/dig/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dig/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dig/vocab.txt b/models/dig/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4ba14c0bbd0e4bc71ab04d58cd3d365f209b5321 --- /dev/null +++ b/models/dig/vocab.txt @@ -0,0 +1,32 @@ +c +' +l +a +v +á +o +r +u +k +m +5 +d +ó +n +0 +j + +p +f +y +s +i +e +t +b +w +_ +g +- +z +h diff --git a/models/dik/G_100000.pth b/models/dik/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1608f5dec7ac3f1cad0dd7100060c3684021752f --- /dev/null +++ b/models/dik/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cd4e205aadac5c4ec336da1ad7fa291d08bc3e51a0526ee770d34e1211126ba +size 145480787 diff --git a/models/dik/config.json b/models/dik/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dik/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dik/vocab.txt b/models/dik/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..097c0c8fbc79c4a14a4f9b2334048b4a506f0ce9 --- /dev/null +++ b/models/dik/vocab.txt @@ -0,0 +1,33 @@ +| +k +a +n +i +c +e +u +t +ë +l +ï +ɔ +h +y +̈ +r +ɛ +o +b +m +ä +w +p +d +ŋ +ö +g +j +ɣ +' +- + diff --git a/models/dip/G_100000.pth b/models/dip/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca83512da13062ddc21a01ea12a7648d425bad2e --- /dev/null +++ b/models/dip/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8207ce4a9c9283284ded53a6e4f300ad4cb4cb9210350d2c00cf230e6dbe38c +size 145477645 diff --git a/models/dip/config.json b/models/dip/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dip/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dip/vocab.txt b/models/dip/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d596f19866f03f7d20a704e8dd5aa6e463f36840 --- /dev/null +++ b/models/dip/vocab.txt @@ -0,0 +1,29 @@ +| +e +i +k +a +n +u +t +c +ɔ +o +y +l +r +h +d +ɛ +m +b +w +p +ŋ +g +j +ɣ +ԑ +— +ı + diff --git a/models/div/G_100000.pth b/models/div/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ce2ccb9e0964dd953722768a9361a6bd90b1bd5a --- /dev/null +++ b/models/div/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a23231c6cfa2ff64feb828b651e45b5888d2361a1d21f35471419280e28cb16 +size 145496051 diff --git a/models/div/config.json b/models/div/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/div/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/div/vocab.txt b/models/div/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc4ceb10967e5d2573de2c8f950f1f7d9ec9116a --- /dev/null +++ b/models/div/vocab.txt @@ -0,0 +1,53 @@ +| +ަ +ެ +އ +ް +ނ +ު +ި +ާ +ކ +ވ +ގ +މ +ރ +ހ +ީ +ތ +ދ +ފ +ބ +ސ +ލ +ށ +ޭ +ޮ +ޅ +ޔ +ޑ +ޫ +ޖ +ޙ +ޕ +ޯ +ޓ +ޤ +ޒ +ޢ +ޏ +ޗ +ޚ +ޝ +ޞ +ޣ +' +ޟ +ل +ޡ +ا +ه +ޛ +ޠ +ޘ + diff --git a/models/djk/G_100000.pth b/models/djk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1001ed409e43ebc4c6d796dbf6acfc11fd60a64a --- /dev/null +++ b/models/djk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1912da76c8bba46b9728efed63546df787ad5a5dae685d212ac523df2b65a088 +size 145478395 diff --git a/models/djk/config.json b/models/djk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/djk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/djk/vocab.txt b/models/djk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d25be8558784fe9a326c68e8b0b83924997aa350 --- /dev/null +++ b/models/djk/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +e +n +u +o +s +d +k +t +m +f +b +g +w +l +p +y +á +j +h +v +z +0 +1 +2 +- +r + diff --git a/models/dnj-dialect_blowowest/G_100000.pth b/models/dnj-dialect_blowowest/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a178850a3c268a272ff20da521c5d7b329e2b85 --- /dev/null +++ b/models/dnj-dialect_blowowest/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0491fa7c3d39320fa69ea3d4957621ac015ad2f94495333f24795d4ab1de7352 +size 145479915 diff --git a/models/dnj-dialect_blowowest/config.json b/models/dnj-dialect_blowowest/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dnj-dialect_blowowest/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dnj-dialect_blowowest/vocab.txt b/models/dnj-dialect_blowowest/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24a66b64914eb67863583c26b30d4a1807f296e7 --- /dev/null +++ b/models/dnj-dialect_blowowest/vocab.txt @@ -0,0 +1,32 @@ +| +‐ +a +' +ö +n +h +k +ë +d +ɛ +b +u +o +y +w +g +꞊ +ɔ +i +ü +l +p +m +s +z +t +e +f +v +r + diff --git a/models/dnj-dialect_gweetaawueast/G_100000.pth b/models/dnj-dialect_gweetaawueast/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..432d22aeee4cf2bf5a21b35e08722d9eecb53460 --- /dev/null +++ b/models/dnj-dialect_gweetaawueast/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10eaa94342847ea4562c1d67a79697feafaa45f1b6e75745b7ff67c5fd873ea7 +size 145482845 diff --git a/models/dnj-dialect_gweetaawueast/config.json b/models/dnj-dialect_gweetaawueast/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dnj-dialect_gweetaawueast/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dnj-dialect_gweetaawueast/vocab.txt b/models/dnj-dialect_gweetaawueast/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7be4199ff12b301ee61d8eefaa0c38c0d5ae28ea --- /dev/null +++ b/models/dnj-dialect_gweetaawueast/vocab.txt @@ -0,0 +1,36 @@ +| +‐ +a +' +h +n +ö +d +k +ɛ +b +y +g +ë +ɔ +w +o +i +꞊ +u +ü +m +s +p +t +ʋ +z +̈ +e +l +ɩ +f +r +v +1 + diff --git a/models/dnt/G_100000.pth b/models/dnt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..678cd044189b7b317f4dd005bb149ae61bc4cf4e --- /dev/null +++ b/models/dnt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c375d521b1998d7b50df172a7012f2180a31d7b24f252ed3c5ef9a9765701146 +size 145484531 diff --git a/models/dnt/config.json b/models/dnt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dnt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dnt/vocab.txt b/models/dnt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7bca5cf47b072e70a79a857700e073068582f6e4 --- /dev/null +++ b/models/dnt/vocab.txt @@ -0,0 +1,38 @@ +| +a +e +i +k +o +n +h +g +t +l +u +m +s +r +b +w +d +y +p +' +- +f +0 +1 +7 +z +2 +4 +3 +j +5 +6 +9 +c +8 +v + diff --git a/models/dnw/G_100000.pth b/models/dnw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..55f94593617fac2e5e494a1c3f33adc6965dc3e8 --- /dev/null +++ b/models/dnw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a34fd6e93e35129dfaa608414c3dcff22048b055a030ee76be8df1b807314c6e +size 145482231 diff --git a/models/dnw/config.json b/models/dnw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dnw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dnw/vocab.txt b/models/dnw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..54186845841745f9bfed4151ceec00a635985e57 --- /dev/null +++ b/models/dnw/vocab.txt @@ -0,0 +1,35 @@ +c +p +2 +0 +b +7 +l +6 +r +e +' +n +a +d +t +h +o +3 +9 +w + +_ +- +i +u +g +s +k +j +8 +4 +y +m +1 +5 diff --git a/models/dop/G_100000.pth b/models/dop/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9592babae7acc68ecebfb32bc35cae68bab1404c --- /dev/null +++ b/models/dop/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b320517f5ec87eb8e2b1c82639645dddcfa9f7673c41ec2b09adc92679a09389 +size 145482237 diff --git a/models/dop/config.json b/models/dop/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dop/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dop/vocab.txt b/models/dop/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fb57ecd148e2d2d845c4ca821e84f171b02ea364 --- /dev/null +++ b/models/dop/vocab.txt @@ -0,0 +1,35 @@ +| +a +t +ɩ +n +ɔ +ɛ +p +l +ʋ +ə +s +k +m +ɣ +i +́ +y +e +u +w +ŋ +á +- +o +h +c +f +ḿ +ń +ú +é +í +3 + diff --git a/models/dos/G_100000.pth b/models/dos/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4feaed7ea771e7318c58828121baf4adb7004655 --- /dev/null +++ b/models/dos/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cd8e02b67a90edafc21b20e0ad2e23eeb8abfaad117c12edab9a7f3d2b92034 +size 145489149 diff --git a/models/dos/config.json b/models/dos/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dos/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dos/vocab.txt b/models/dos/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..83182645ba658ba7dc04fc0b4b62eb9685f5851a --- /dev/null +++ b/models/dos/vocab.txt @@ -0,0 +1,44 @@ +o +ɓ +v +w +ɛ +h +u +0 +à +1 +á +n + +́ +í +p +ĩ +s +ɩ +r +ʋ +k +i +ũ +ŋ +y +ɔ +ã +c +ʔ +ɗ +z +̀ +̃ +_ +m +e +a +b +d +f +g +l +t diff --git a/models/dsh/G_100000.pth b/models/dsh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f94e076cbd834bb5aece923339ad3a2572fe8e97 --- /dev/null +++ b/models/dsh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5e714dca0f48b471e3a921a484898877d42f55e88b3342aad04ae83177aa2c +size 145486065 diff --git a/models/dsh/config.json b/models/dsh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dsh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dsh/vocab.txt b/models/dsh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..96bea12f4fa2d0a2b40aace6180b6007e533057c --- /dev/null +++ b/models/dsh/vocab.txt @@ -0,0 +1,40 @@ +k + +̶ +ó +ô +j +- +b +u +ʼ +_ +t +ˈ +o +r +w +â +l +đ +a +é +m +ê +v +' +î +h +f +á +ú +n +e +i +c +í +û +d +g +s +y diff --git a/models/dso/G_100000.pth b/models/dso/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..14df247d7d265b52a062a5670a091889e32f4e2c --- /dev/null +++ b/models/dso/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2b0ba5e51d02428e84a9f456430d322ad1286df308ebe45e501a193263aa00 +size 145480681 diff --git a/models/dso/config.json b/models/dso/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dso/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dso/vocab.txt b/models/dso/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..94513f03b6f4912aff4f02c7633beccd44d0ed3b --- /dev/null +++ b/models/dso/vocab.txt @@ -0,0 +1,33 @@ +୍ +ଡ +ପ +ଙ +ର +ଇ +ଅ +_ +ବ +ଲ +ଚ +ଁ +ଉ +ସ +ା +ନ +ଦ +ୟ +ଣ +ଗ +େ +ଆ + +ଜ +ଟ +‍ +ମ +ୁ +ି +ଏ +ଞ +କ +ତ diff --git a/models/dtp/G_100000.pth b/models/dtp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..46c28823784a7ec3414511d61b335274edf941f0 --- /dev/null +++ b/models/dtp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7380a3b72812b135bedfe9b1198a986016981a1427e73b1838adaae77c9ff301 +size 145479941 diff --git a/models/dtp/config.json b/models/dtp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dtp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dtp/vocab.txt b/models/dtp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..73443258e9e61f0ebc077e5409e03e78ee4fd6e4 --- /dev/null +++ b/models/dtp/vocab.txt @@ -0,0 +1,32 @@ +| +o +i +a +n +d +u +t +k +m +' +s +g +p +r +l +y +h +b +w +- +e +j +— +0 +4 +c +1 +6 +2 +3 + diff --git a/models/dts/G_100000.pth b/models/dts/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..07049cd1e80b1d0f832dccd9ae827ae020c5cced --- /dev/null +++ b/models/dts/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca9d7d8ff4363731aa96aa5124bc6036f2655af21cea8e6b2a6a15c0c6e3203 +size 145476872 diff --git a/models/dts/config.json b/models/dts/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dts/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dts/vocab.txt b/models/dts/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..86f2e9cb2f9554afab02bb76d1be4ac8631849d1 --- /dev/null +++ b/models/dts/vocab.txt @@ -0,0 +1,28 @@ +| +a +i +ɛ +n +u +ɔ +e +g +b +w +o +m +r +l +y +k +s +d +j +t +p +ŋ +ɲ +h +' +- + diff --git a/models/dug/G_100000.pth b/models/dug/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6739514f78a5c110f2db41d6d02afbb77d32864f --- /dev/null +++ b/models/dug/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d3df592df5310d8ba1326e670933b376be4ef0b181dfdb53244812b8cd9032 +size 145478385 diff --git a/models/dug/config.json b/models/dug/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dug/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dug/vocab.txt b/models/dug/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1ae381fb00735b7d6bc5963811f636c8418f0608 --- /dev/null +++ b/models/dug/vocab.txt @@ -0,0 +1,30 @@ +a +| +i +u +n +m +e +o +k +h +w +l +r +y +d +g +t +s +z +' +c +b +p +v +j +á +f +- +6 + diff --git a/models/dwr/G_100000.pth b/models/dwr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff43d44f9389220e99a16cd686ed3304b424e333 --- /dev/null +++ b/models/dwr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:543b8356ab013e5fb28cf3886fde718243e23f6b398e4a2cf52be325da581365 +size 145478383 diff --git a/models/dwr/config.json b/models/dwr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dwr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dwr/vocab.txt b/models/dwr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..648c87cc2d13018c4afd536789d072ebd5686f9f --- /dev/null +++ b/models/dwr/vocab.txt @@ -0,0 +1,30 @@ +a +| +e +i +n +t +d +o +s +y +u +h +w +k +g +m +p +b +l +r +q +7 +x +c +z +f +' +j +- + diff --git a/models/dyi/G_100000.pth b/models/dyi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6cfb0436094a0dca21fe3af16cc390b4e4cb48e9 --- /dev/null +++ b/models/dyi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0c0c03d11f3662813713f0a5225440652ff208d6eac2cb7cf75013c4626ac4 +size 145483017 diff --git a/models/dyi/config.json b/models/dyi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dyi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dyi/vocab.txt b/models/dyi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..637f829ba7f41a1aa7732954ace9a273f3e2b532 --- /dev/null +++ b/models/dyi/vocab.txt @@ -0,0 +1,36 @@ +| +a +n +i +e +ɛ +y +l +w +g +k +o +p +ɔ +r +m +u +t +s +ŋ +b +f +j +ì +z +h +d +è +à +c +' +v +ù +ò +̀ + diff --git a/models/dyo/G_100000.pth b/models/dyo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a59bd5b4305e541953e490b3d2f22d62d780b2c --- /dev/null +++ b/models/dyo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f032fd0408b6b808e36433c4341bd29b50faeb928284fff51c233a036da9d14 +size 145481483 diff --git a/models/dyo/config.json b/models/dyo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dyo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dyo/vocab.txt b/models/dyo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..59771e35fd0339238ad78e9327d2364f81c35e7f --- /dev/null +++ b/models/dyo/vocab.txt @@ -0,0 +1,34 @@ +a +| +i +u +n +m +e +k +o +t +b +l +j +r +d +y +s +f +á +p +ú +w +ŋ +g +ñ +c +í +' +é +h +ó +- +— + diff --git a/models/dyu/G_100000.pth b/models/dyu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..97c97d11b4d3598532b5be467ad6e2385b45739d --- /dev/null +++ b/models/dyu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:628b519cd87bb26322949d0b18dd4839863a96f15d881553018070f45df13e98 +size 145479929 diff --git a/models/dyu/config.json b/models/dyu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dyu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dyu/vocab.txt b/models/dyu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..127dfe0490c8b49b308c280beb7600fbe0076c07 --- /dev/null +++ b/models/dyu/vocab.txt @@ -0,0 +1,32 @@ +t +y +b +u +j +ɲ +p +o +s +w +m +h +i +_ +' +l +f + +- +g +n +z +ɛ +a +k +ɔ +v +d +r +ŋ +e +c diff --git a/models/dzo/G_100000.pth b/models/dzo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e34490180d0048167b532757f8a739aac008bfed --- /dev/null +++ b/models/dzo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed37d9826a6d7cdf78d962bd53fc9ee5b2c79d04bb6a93284cdca0d90e44a88b +size 145499911 diff --git a/models/dzo/config.json b/models/dzo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/dzo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/dzo/vocab.txt b/models/dzo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..aaae0c25e048873ed18115d5cf1e78c517a37aad --- /dev/null +++ b/models/dzo/vocab.txt @@ -0,0 +1,58 @@ +་ +ི +ས +ག +ད +ོ +ུ +ེ +བ +ན +ང +མ +ལ +ར +འ +ྱ +ཁ +པ +ཅ +ཀ +ཡ +ཚ +ཟ +ྲ +ཨ +ཏ +ཆ +ཤ +ྦ +ྟ +ཐ +ཞ +ྐ +ཕ +ཱ +ཝ +ྒ +ླ +ྡ +ཉ +ྨ +ཙ +ཛ +ཧ +ྤ +ྩ +ྷ +ཇ +ྗ +ྔ +ྣ +ྙ +ྫ +ྕ +ྭ +ཌ +ཪ + diff --git a/models/eip/G_100000.pth b/models/eip/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8630244ddff082757fe04d48672737e9ea94c185 --- /dev/null +++ b/models/eip/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deac34eb1f251c1d447a83ee7409444988ef273341bf7839074d9a7c95e52620 +size 145484639 diff --git a/models/eip/config.json b/models/eip/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/eip/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/eip/vocab.txt b/models/eip/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..495a497789dccfc04976bb3903787daf0a3ca7fb --- /dev/null +++ b/models/eip/vocab.txt @@ -0,0 +1,38 @@ +x +c +g +n +j +f + +s +6 +w +h +5 +9 +r +2 +' +o +d +8 +0 +i +k +1 +a +3 +p +l +m +z +y +b +- +_ +t +4 +u +e +7 diff --git a/models/eka/G_100000.pth b/models/eka/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b3707183ae57c398747f0a4cff145f407079ad4e --- /dev/null +++ b/models/eka/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68264045910d091c6c6eede33df17d2f2bc176af2fa94aefd6b01ee3042e243 +size 145477634 diff --git a/models/eka/config.json b/models/eka/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/eka/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/eka/vocab.txt b/models/eka/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5ac04106fa85272d568c7a8ca9c8ab53db5cb1c3 --- /dev/null +++ b/models/eka/vocab.txt @@ -0,0 +1,29 @@ +f +o +s +n +ḿ +m +p +i +d +ń +e +w +- +v +j +u +' +k +y +g +r +á +b +h +l +t +a +_ + diff --git a/models/ell/G_100000.pth b/models/ell/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..73d239b5150ab7ae018c24b23ff5ee546082ba62 --- /dev/null +++ b/models/ell/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75bfa237f0fe859b34c4340bc7dccd944678cf9984bce5b5a82e2c90ca268db8 +size 145504497 diff --git a/models/ell/config.json b/models/ell/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ell/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ell/vocab.txt b/models/ell/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..f7b8fe0a3ec8a7c8066ced4de9be87bd6b94a24e --- /dev/null +++ b/models/ell/vocab.txt @@ -0,0 +1,64 @@ +b +1 +ή +p +5 +θ +ί +i +x +δ +ά +λ +7 +- +φ +π +ω +' +ϊ +k +ό +ψ +n +t + +y +τ +a +4 +β +â +κ +9 +γ +ξ +8 +χ +ε +h +ν +ζ +z +η +m +o +2 +6 +3 +e +υ +ώ +ϋ +έ +σ +ι +μ +α +ο +_ +ύ +0 +ς +ρ +ΐ diff --git a/models/emp/G_100000.pth b/models/emp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a2909f72d9ab11c464ea22426d488d5bd49f3c0 --- /dev/null +++ b/models/emp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:247dd6af077e87c9f5c0bba90237599eed198912ba04bb8d5a8e842522bd2121 +size 145499227 diff --git a/models/emp/config.json b/models/emp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/emp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/emp/vocab.txt b/models/emp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..504f65560422f94b7886be2b005c75b84e1e0bf3 --- /dev/null +++ b/models/emp/vocab.txt @@ -0,0 +1,57 @@ +a +| +r +i +b +e +â +d +j +ö +ô +u +c +m +n +o +õ +s +å +w +ó +p +é +t +ì +q +ï +y +ã +z +g +h +l +– +è +ò +v +ä +á +ð +ú +ë +4 +9 +0 +2 +1 +6 +5 +x +ñ +3 +8 +7 +k +- + diff --git a/models/enb/G_100000.pth b/models/enb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd219476e0d8917c83dd9ee31bffe0a1d337059b --- /dev/null +++ b/models/enb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67830a6943dbb27fd9f049668b07874360f676bede8eef8e728a51108789ae84 +size 145479045 diff --git a/models/enb/config.json b/models/enb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/enb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/enb/vocab.txt b/models/enb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b704d654cb7461d446682bc3129255a846956d30 --- /dev/null +++ b/models/enb/vocab.txt @@ -0,0 +1,31 @@ +| +i +k +o +ē +a +n +u +y +ō +t +r +c +h +p +e +m +ā +l +w +s +g +ʼ +- +' +0 +1 +2 +6 +4 + diff --git a/models/enx/G_100000.pth b/models/enx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c2a21a3b9b2d3311213452a13bea41677e91b8d --- /dev/null +++ b/models/enx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed353e7371067693c3ee3a448a915aacd1a86ab94cf6cdbc5dd0d7eb28f03c6e +size 145485439 diff --git a/models/enx/config.json b/models/enx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/enx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/enx/vocab.txt b/models/enx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0b96990aad72841f10671537e7802e3ca8402661 --- /dev/null +++ b/models/enx/vocab.txt @@ -0,0 +1,39 @@ +ú +é +ẽ +— +w +y +à +a +m +z +v +n +í +i +l +ñ +u +g +r +' +f +h +e +o +t +b +q +x +p +è +s +á +d +j + +k +c +_ +ó diff --git a/models/ese/G_100000.pth b/models/ese/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..144d210080a7705378fe29b1c5c8888a47a7464c --- /dev/null +++ b/models/ese/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5869fc9e9e53d741c08c075e90431b755b09a5e019e334e24f2725235f13f8d +size 145490685 diff --git a/models/ese/config.json b/models/ese/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ese/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ese/vocab.txt b/models/ese/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..09ea7ff9265bb5978050d8be5cd15cf58757b860 --- /dev/null +++ b/models/ese/vocab.txt @@ -0,0 +1,46 @@ +a +| +j +e +i +o +u +n +c +m +y +h +p +q +s +b +' +t +ñ +- +d +r +l +— +g +í +é +v +f +0 +á +z +ó +1 +2 +4 +5 +ú +x +6 +3 +9 +7 +k +8 + diff --git a/models/ess/G_100000.pth b/models/ess/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..12cb5ccf88eaa5fca03214dfc6e7e04afa7ee4a3 --- /dev/null +++ b/models/ess/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a746da2e40313721ff547c1e21abb410509e8c3e03855be75524781027f597b9 +size 145488337 diff --git a/models/ess/config.json b/models/ess/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ess/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ess/vocab.txt b/models/ess/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d13f89dd70c6b9f9764d3143873ad028bc068cfd --- /dev/null +++ b/models/ess/vocab.txt @@ -0,0 +1,43 @@ + +c +1 +s +0 +f +8 +v +2 +x +_ +w +g +m +l +3 +‐ +9 +q +i +e +u +ʼ +j +d +b +r +y +n +z +5 +' +- +6 +ꞌ +7 +k +a +4 +p +h +o +t diff --git a/models/eus/G_100000.pth b/models/eus/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..72f755f1ed7a5ec5472b8c1c26d682797c7a3ba3 --- /dev/null +++ b/models/eus/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a64cce0626fc9e9ebe76d8df258054515931ad5d538e56ff194a5dc8aa73774 +size 145475313 diff --git a/models/eus/config.json b/models/eus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/eus/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/eus/vocab.txt b/models/eus/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..74fb1bb1fb18c25ca1cf73b0c9ba3b17b86be79f --- /dev/null +++ b/models/eus/vocab.txt @@ -0,0 +1,26 @@ +b +— +o + +s +u +p +_ +f +r +x +a +h +t +l +g +e +m +v +k +j +n +d +i +z +- diff --git a/models/evn/G_100000.pth b/models/evn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fae43902e5e0a4c9c4ddd665110cc642d6464ab2 --- /dev/null +++ b/models/evn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8062da0b10a59495e0c43d6a3d3373df48ac7a1ab6c221fdac3526ae4cac8e +size 145484544 diff --git a/models/evn/config.json b/models/evn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/evn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/evn/vocab.txt b/models/evn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..86ee78238513b1435180503fbdf198314e30b97a --- /dev/null +++ b/models/evn/vocab.txt @@ -0,0 +1,38 @@ +| +н +э +а +у +д +л +и +т +к +р +ӣ +в +м +с +ӯ +ч +г +ӈ +б +е +ы +х +о +- +я +– +п +ё +ю +й +ф +ь +ъ +ц +ш +з + diff --git a/models/ewe/G_100000.pth b/models/ewe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..576ca7b49bd28066bf1761fc4f3d1f84c7218e89 --- /dev/null +++ b/models/ewe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1fd0ae53a553c65a0770ce79cd97aaf46256540c19a4d1edf67e80f9da9b1f0 +size 145494609 diff --git a/models/ewe/config.json b/models/ewe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ewe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ewe/vocab.txt b/models/ewe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..853565eb9ef5cc5ab390edcaa2454779fca24151 --- /dev/null +++ b/models/ewe/vocab.txt @@ -0,0 +1,51 @@ +r +w +a +l +g +ɣ +k +̀ +b +n +ŋ +x +d +á + +ù +s +o +p +ɖ +í +ƒ +z +ɔ +_ +̃ +i +à +ũ +v +h +ã +y +ʋ +- +́ +ɛ +è +ú +e +t +ó +m +ì +é +ò +f +' +ẽ +u +ĩ diff --git a/models/eza/G_100000.pth b/models/eza/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bdb8343110fce942b260b5f43e8aae0ef5608ba0 --- /dev/null +++ b/models/eza/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c174877a46c1d6ee36182bbb6b85eec0a3e26a6aa21b827004e54ae9a77afcc8 +size 145489919 diff --git a/models/eza/config.json b/models/eza/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/eza/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/eza/vocab.txt b/models/eza/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e081436818ce598c2679b205d2834f2fd01f87b7 --- /dev/null +++ b/models/eza/vocab.txt @@ -0,0 +1,45 @@ +_ +à +h +n +̀ +í +ò +é +y +ụ +i +u +á +g +a +s +ẹ +ù +c +è +j +z +ó +ị + +ì +d +f +- +ú +b +ọ +m +l +6 +p +' +e +o +r +w +k +v +t +́ diff --git a/models/fal/G_100000.pth b/models/fal/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1b2e0074801d0a22fb1610043279510fd55573c --- /dev/null +++ b/models/fal/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f8e543e792e4edb2a7123092ae836483950bfce27fbee5cd16422fb04670e36 +size 145484634 diff --git a/models/fal/config.json b/models/fal/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fal/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fal/vocab.txt b/models/fal/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..51dcb592bfe5ce94927773106e181425e6dfc0a8 --- /dev/null +++ b/models/fal/vocab.txt @@ -0,0 +1,38 @@ +| +a +i +n +e +y +r +m +g +t +u +k +o +w +à +d +s +â +è +b +j +l +p +h +f +î +c +ò +ɗ +û +' +ɓ +- +ê +ù +1 +v + diff --git a/models/fao/G_100000.pth b/models/fao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..92385afd57158c21b3c66de6631121af0e65921e --- /dev/null +++ b/models/fao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db42d4606eabbe502a8424ac76654e22ccbac540a1adb6e90eaaaa66a89189b0 +size 145483767 diff --git a/models/fao/config.json b/models/fao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fao/vocab.txt b/models/fao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4a69847d415993a9264dbb91afea8dbce413ad5a --- /dev/null +++ b/models/fao/vocab.txt @@ -0,0 +1,37 @@ + +z +i +h +m +r +u +k +s +d +v +f +ó +b +– +p +í +e +j +- +y +ø +l +1 +æ +a +o +ú +ð +ý +4 +n +_ +t +7 +á +g diff --git a/models/far/G_100000.pth b/models/far/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..72af65699e388990de5c4237fa1ad8b8910a79cc --- /dev/null +++ b/models/far/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53f4319bf5b617f9570c979227f5f36b3d84b913a8079652dfcbed9a6f8d2f6 +size 145476176 diff --git a/models/far/config.json b/models/far/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/far/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/far/vocab.txt b/models/far/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..73d179dc0f8861206b6cdbf4d8ba1c52482ce4dd --- /dev/null +++ b/models/far/vocab.txt @@ -0,0 +1,27 @@ +b +h +d +p +g +' +c +w +l +n +s +_ +6 +e +o +t +y +f +r +v +i +a +j +u + +k +m diff --git a/models/fas/G_100000.pth b/models/fas/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd8acdf05e15c42b67ee98c5230b61bcf406b64f --- /dev/null +++ b/models/fas/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed10e401f38c2877a276c24cfafe34b824ed454e1e0713acce468f001178a59 +size 145489111 diff --git a/models/fas/config.json b/models/fas/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fas/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fas/vocab.txt b/models/fas/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ea255d24a66c2e529f3f399d5d0e3191902e52b2 --- /dev/null +++ b/models/fas/vocab.txt @@ -0,0 +1,44 @@ +ل +پ +ع +أ +ئ +ذ +_ +ك +ج +ر +ه +ف +گ +ي +ء +ی +ص +ق +ح +ط +ث +ٔ +ن +م +ظ +ت +ب +و +د +غ + +ش +خ +ک +آ +- +ؤ +چ +ژ +س +' +ا +ض +ز diff --git a/models/fij/G_100000.pth b/models/fij/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc2b637c61ad7f90ebab7122d9842cd999a2769e --- /dev/null +++ b/models/fij/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b408667344070ed54375c8203de9406085a6e462d1472d8fa4a73c23b66d8c27 +size 145483021 diff --git a/models/fij/config.json b/models/fij/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fij/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fij/vocab.txt b/models/fij/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..66e865d76ef80708cc834c094aa7b43048835b3c --- /dev/null +++ b/models/fij/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +k +n +e +u +o +v +t +s +l +r +m +d +g +y +c +q +b +w +p +j +' +f +ʻ +ʼ +- +0 +2 +1 +— +3 +5 +4 + diff --git a/models/fin/G_100000.pth b/models/fin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d7daaeb63981714ad983144e6d55e742d5dc2b7 --- /dev/null +++ b/models/fin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb89c0434f283cf7dbaf6af78902de94b4ab2af0757e95a4be9a95dcc9147884 +size 145477215 diff --git a/models/fin/config.json b/models/fin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fin/vocab.txt b/models/fin/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..68e5a7d851b15f888720851e0c7630235d98f663 --- /dev/null +++ b/models/fin/vocab.txt @@ -0,0 +1,28 @@ +p +g +j +v +y +z +s +ä +b +- +m +h +t + +r +f +' +_ +l +ö +u +n +a +k +d +o +e +i diff --git a/models/flr/G_100000.pth b/models/flr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3de8f7b165afedf7b6344698ee9428d82fa077a --- /dev/null +++ b/models/flr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54498282786abfc7c113545e64c5d926df2454bbea5735220db6665e4ed14dc6 +size 145479917 diff --git a/models/flr/config.json b/models/flr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/flr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/flr/vocab.txt b/models/flr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a8bd516c1d04925f2ec2bf10a45b62e7fbb6f965 --- /dev/null +++ b/models/flr/vocab.txt @@ -0,0 +1,32 @@ +f +ꞌ +â +p +e +ù +n +h +ú +d +w +l +_ + +k +j +z +r +í +y +u +o +g +- +m +t +s +i +b +v +a +á diff --git a/models/fmu/G_100000.pth b/models/fmu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7bbbcee9c6b0c1b5cb66639295c3a35de056f2a3 --- /dev/null +++ b/models/fmu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c13aa340fbe5a18861a5d6ebaf33fda66a31b8ebc190d4b882a4814c98f63abd +size 145493759 diff --git a/models/fmu/config.json b/models/fmu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fmu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fmu/vocab.txt b/models/fmu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..455f3fd6854670c5498ead11abf15775d6976a2c --- /dev/null +++ b/models/fmu/vocab.txt @@ -0,0 +1,50 @@ +ऊ +स +‍ +ि +़ +ी +ओ +य +ळ +अ +घ +ै +_ +इ +ड +ए +ब +म +ू +ल +1 +च +ा +ो +ह +ण +' +श +आ +द +उ +क +प +ज +् +ट +ष +- +ु +े +व +त +ग +भ +ध +ं +न + +ख +र diff --git a/models/fon/G_100000.pth b/models/fon/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3dadfffcb9043fb6707ca03810499e59421bc71f --- /dev/null +++ b/models/fon/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90dc6363951cb268db08d6c7fa325742920be98f4d38d24d1da07c589fca37bc +size 145490681 diff --git a/models/fon/config.json b/models/fon/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fon/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fon/vocab.txt b/models/fon/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..defe146a04635911d072f2511fe725b605fc92e9 --- /dev/null +++ b/models/fon/vocab.txt @@ -0,0 +1,46 @@ +_ +è +ɖ +̌ +ǐ +t +d +- +f +e +ó +ú +é +v +ǎ +g +' + +s +ǔ +k +́ +a +n +h +i +í +b +u +p +m +á +j +đ +l +ǒ +x +ɔ +z +y +c +o +w +ì +ě +ɛ diff --git a/models/fra/G_100000.pth b/models/fra/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..557469051d4247405493b333a6efdf9096643813 --- /dev/null +++ b/models/fra/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63725b5a9201548b2247af02bd69a059335bddf52c1b858dbe38a43a40478bd7 +size 145489135 diff --git a/models/fra/config.json b/models/fra/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/fra/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/fra/vocab.txt b/models/fra/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..54bbe0753eef3fdc5fd52938e68cb9f3ecb209a6 --- /dev/null +++ b/models/fra/vocab.txt @@ -0,0 +1,44 @@ +î +z +- +ù +u +û +y +è +x +q +c +g +ô +ê +_ +e +i +d +b +a +f +œ +h +r +é +k +s +v +à + +– +p +ë +t +n +o +j +ü +â +ç +ï +' +m +l diff --git a/models/frd/G_100000.pth b/models/frd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba5f80cf818abdb3640dd6c4bad6bd6937961a54 --- /dev/null +++ b/models/frd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f9fa6c9350ad77ca23d1353e25429012f5aef5f638b88091e32787880feb25 +size 145477733 diff --git a/models/frd/config.json b/models/frd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/frd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/frd/vocab.txt b/models/frd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3755d0efc17e50217e17b29716611f7a3c26d40b --- /dev/null +++ b/models/frd/vocab.txt @@ -0,0 +1,29 @@ +b +v +o +m +ꞌ +j +e +d +f +z +n +l +p +h +c +w +r +k + +a +i +' +_ +- +y +u +t +s +g diff --git a/models/ful/G_100000.pth b/models/ful/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..af461896275d79cf57d6d1d3de97bb757368b0da --- /dev/null +++ b/models/ful/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0555ae5d6d1ac1c57abfa182f32488b7a72a487b3bc614a193e624303e5fb488 +size 145486823 diff --git a/models/ful/config.json b/models/ful/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ful/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ful/vocab.txt b/models/ful/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..1d4276822594caa858b23c9e87f837542799dd1b --- /dev/null +++ b/models/ful/vocab.txt @@ -0,0 +1,41 @@ +| +a +o +i +e +n +m +u +ɗ +k +d +l +r +y +ɓ +w +t +g +h +' +s +b +j +f +c +p +ƴ +0 +‐ +ŋ +4 +1 +2 +5 +3 +6 +7 +9 +8 +— + diff --git a/models/gag-script_cyrillic/G_100000.pth b/models/gag-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..243b9b7266cdf7aea4911b6980596e9f97d3181d --- /dev/null +++ b/models/gag-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a67b9b898d1b3bd1faebfe68e2b75e3d4788ba8941bd8839865d5f02c2576007 +size 145483015 diff --git a/models/gag-script_cyrillic/config.json b/models/gag-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gag-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gag-script_cyrillic/vocab.txt b/models/gag-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b0fb3b79b64d43d06911cac4152b711726c7228c --- /dev/null +++ b/models/gag-script_cyrillic/vocab.txt @@ -0,0 +1,36 @@ +л +н +с +з +у +ш +ы +о +_ +ӱ +б +ж +д +к +е +п +р +– +г + +ӓ +ӧ +ч +ф +и +й +ц +т +ӂ +в +' +м +- +х +э +а diff --git a/models/gag-script_latin/G_100000.pth b/models/gag-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..978c42907c200a8d330c162bd72e33ab23cf9e52 --- /dev/null +++ b/models/gag-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3cb5191ee36f15a8b1ad0d7ea7a70d3b5ca6397703258748bf7c8c3b8fbd5f +size 145484643 diff --git a/models/gag-script_latin/config.json b/models/gag-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gag-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gag-script_latin/vocab.txt b/models/gag-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5c17fcd05c51554751e5a5744ab36e1a2529e977 --- /dev/null +++ b/models/gag-script_latin/vocab.txt @@ -0,0 +1,38 @@ +| +a +n +і +r +e +l +d +ı +k +s +m +o +ä +u +t +b +z +y +ü +h +ş +g +ç +p +v +c +ö +ê +i +̇ +f +– +- +ţ +j +' + diff --git a/models/gai/G_100000.pth b/models/gai/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3b55fb0bb23afa88321e19af4374c23ccea8f89a --- /dev/null +++ b/models/gai/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da6f1407c647d6d2971a9ecc348c44a74a7ad56155084983dba79a6fcb3aee6 +size 145480709 diff --git a/models/gai/config.json b/models/gai/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gai/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gai/vocab.txt b/models/gai/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7750077184ea0654f4f4f70b06652de66a32af30 --- /dev/null +++ b/models/gai/vocab.txt @@ -0,0 +1,33 @@ +m +5 +t +r +o +7 +g +c +e + +' +ɨ +l +ŋ +_ +0 +k +n +9 +a +i +u +3 +1 +b +8 +w +d +p +6 +s +4 +2 diff --git a/models/gam/G_100000.pth b/models/gam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8fa56da94326fc5c692f3c2cfd18a35399cedb8e --- /dev/null +++ b/models/gam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39223539e87b3b0454f5a4e37ed5910046f153d8d31ea050408af6971a88781c +size 145479905 diff --git a/models/gam/config.json b/models/gam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gam/vocab.txt b/models/gam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9da680c9dfa07b18b51634b98c027c2e1de33097 --- /dev/null +++ b/models/gam/vocab.txt @@ -0,0 +1,32 @@ +u +m +k +i +t +1 +g +3 +b +8 +- +_ +r +w +4 +7 +s +j +0 +d +e + +2 +y +o +l +6 +9 +n +a +p +5 diff --git a/models/gau/G_100000.pth b/models/gau/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..768e421150b8e5053cded2d6789fd224c78fd876 --- /dev/null +++ b/models/gau/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5d003ab1bbf26fd4a96607a08bdc96b2262bcf2d6d8baeba6ac18c657134ae +size 145502217 diff --git a/models/gau/config.json b/models/gau/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gau/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gau/vocab.txt b/models/gau/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ab7679fa409f3bf57a417cb156a859546c202e7c --- /dev/null +++ b/models/gau/vocab.txt @@ -0,0 +1,61 @@ +ఔ +ణ +మ +ఒ +ఓ +హ +త + +చ +ఫ +ల +ష +బ +ఉ +ఐ +డ +గ +ఎ +ఖ +ూ +న +ద +భ +‍ +ా +క +ౌ +ృ +ే +ప +ె +ఞ +ు +ఆ +ీ +ఘ +్ +ి +ధ +' +ట +య +జ +ఏ +ఇ +ఈ +ః +_ +శ +ళ +ో +థ +ం +స +ఊ +ొ +ర +ఠ +వ +అ +ై diff --git a/models/gbi/G_100000.pth b/models/gbi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9388a9ec5491848590ae2433ad9e19bdefc119f3 --- /dev/null +++ b/models/gbi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82358611950e3f4bcee85f50b5fd61af865d53875a61fd0ce9add0ca6129840d +size 145481567 diff --git a/models/gbi/config.json b/models/gbi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gbi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gbi/vocab.txt b/models/gbi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0b3241de4606b75c6701affb781f91ad6cfbb9bb --- /dev/null +++ b/models/gbi/vocab.txt @@ -0,0 +1,34 @@ +| +a +o +i +n +e +g +m +k +s +u +d +l +w +t +r +b +y +h +p +ḋ +c +j +f +- +' +z +4 +0 +1 +2 +5 +6 + diff --git a/models/gbk/G_100000.pth b/models/gbk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d37976e18e5805ce0d1dc92b5d17dda2cae32a6 --- /dev/null +++ b/models/gbk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe25d5233279ce356322f570fc076f7b41a44d683bb17be90ccd4e4386b3a8bb +size 145506025 diff --git a/models/gbk/config.json b/models/gbk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gbk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gbk/vocab.txt b/models/gbk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a78f4e5901c786c8bb4981db863c98f7376eda0e --- /dev/null +++ b/models/gbk/vocab.txt @@ -0,0 +1,66 @@ +ञ +ओ +ब +े +त +' +औ +ू +इ +ऊ +र +ळ +ड +आ +घ +प +फ +ौ +ि +ल +द +6 +न +अ +य +ै +ढ +– +- +स +व +ग +क +उ +च +ष +ट +ऑ +भ +ऐ +ण +छ +‍ +झ +् +थ +ँ +ध +ङ +ह +़ +ो +ी +श +ख +ृ +ा +ज +ए +ं +म +ठ +ु +_ +ई + diff --git a/models/gbm/G_100000.pth b/models/gbm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a54e2b435f5bac1f1ee893fb393c1be39819cdf --- /dev/null +++ b/models/gbm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d728c88b78cc7980e70bc3fad343f60e2bed03519924a2bcabfd57b953dffce3 +size 145505265 diff --git a/models/gbm/config.json b/models/gbm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gbm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gbm/vocab.txt b/models/gbm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6a0f1e2b3d5b65e6ca5e17c7f984570eac28dafe --- /dev/null +++ b/models/gbm/vocab.txt @@ -0,0 +1,65 @@ +| +ा +र +क +ि +ं +म +ु +् +त +ी +ल +न +ै +प +व +े +द +ब +स +ो +य +ज +ह +अ +श +च +ण +ग +ट +छ +भ +ख +ू +ड +़ +ौ +उ +ऊ +आ +इ +ध +फ +ए +थ +ठ +ई +ष +- +ढ +झ +घ +ऐ +औ +ँ +ञ +ओ +ृ +ः +ळ +‍ +5 +0 +9 + diff --git a/models/gbo/G_100000.pth b/models/gbo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e1d7e971b1103ee7b6aedd58981fe1c332e27b11 --- /dev/null +++ b/models/gbo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46b417cb6b07146b41a1d25bc956902dd8e8be85204b78ee46d5e87789ed435 +size 145489918 diff --git a/models/gbo/config.json b/models/gbo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gbo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gbo/vocab.txt b/models/gbo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c626a5f4e2b760906bf5ff22398160b113f18792 --- /dev/null +++ b/models/gbo/vocab.txt @@ -0,0 +1,45 @@ +ǎ +p +a + +s +w +m +ï +ä +e +d +j +ɕ +h +n +i +̀ +ü +g +y +_ +b +c +ǒ +k +t +ì +à +è +ɛ +u +' +ɔ +- +o +l +ě +ò +č +f +ù +̌ +ǔ +̈ +ǐ diff --git a/models/gde/G_100000.pth b/models/gde/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..99c1aff92e529fee3c8ac2fa1ae9064f5903af71 --- /dev/null +++ b/models/gde/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d761820e6c9333d8fff58d73a32b0b979817d3d13f9dd6bcc9f1c188fe82390f +size 145484557 diff --git a/models/gde/config.json b/models/gde/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gde/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gde/vocab.txt b/models/gde/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f580ea1a150d486e017f0150be3200c9c5ab9320 --- /dev/null +++ b/models/gde/vocab.txt @@ -0,0 +1,38 @@ +| +a +ə +i +n +k +t +u +m +s +g +d +ŋ +y +h +o +c +w +' +l +e +r +p +b +ɗ +j +z +f +v +ɓ +- +1 +2 +8 +5 +4 +7 + diff --git a/models/geb/G_100000.pth b/models/geb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..49ad59ab3f1464466a8ba71b0c56f6f00426e29e --- /dev/null +++ b/models/geb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b728258a65d0683574c4a2371f83ed9cf18bd10d7e428ef3700693365de8d24 +size 145481559 diff --git a/models/geb/config.json b/models/geb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/geb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/geb/vocab.txt b/models/geb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e7815670a32ab40a64827db9d7854f206a03a24c --- /dev/null +++ b/models/geb/vocab.txt @@ -0,0 +1,34 @@ +| +a +i +n +g +m +u +r +e +h +b +à +k +v +ç +z +t +d +s +p +o +f +w +å +0 +1 +' +2 +5 +3 +4 +9 +8 + diff --git a/models/gej/G_100000.pth b/models/gej/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c4c4def8eb4d000b8c8c56581061d714d40cf13 --- /dev/null +++ b/models/gej/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af242f9e8ede783d263d494760a437ad50df5887f58169776728073c409df2a0 +size 145489901 diff --git a/models/gej/config.json b/models/gej/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gej/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gej/vocab.txt b/models/gej/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..765b7333d011cc3933b42da809e8abfccb2e6919 --- /dev/null +++ b/models/gej/vocab.txt @@ -0,0 +1,45 @@ +| +a +e +o +è +ɔ +n +m +̀ +u +k +ɛ +b +w +i +l +à +y +- +t +s +ɖ +g +̃ +ì +j +ù +ò +ŋ +v +d +p +ã +f +r +h +x +z +c +ũ +đ +ĩ +ƒ +í + diff --git a/models/gil/G_100000.pth b/models/gil/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ba6d22a6daed72c72a185574b089f508067c2fc --- /dev/null +++ b/models/gil/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a1eb530ef21a9f1adba42bd281a08b7ec2c2b9a6f67139ffe5198385e32eb7 +size 145476065 diff --git a/models/gil/config.json b/models/gil/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gil/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gil/vocab.txt b/models/gil/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..79c361131a1ce7bc8ab75b0173818e14a9d79f41 --- /dev/null +++ b/models/gil/vocab.txt @@ -0,0 +1,27 @@ +5 +1 +b +2 +p +t +w +u +7 +0 +a +k +q +g +s +r +- +4 +n +m + +o +_ +' +e +3 +i diff --git a/models/gjn/G_100000.pth b/models/gjn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fecc3efa361513d307a8d75a58ee485ae1617d0b --- /dev/null +++ b/models/gjn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5c511b5f248152c38aacbe7a1002e555a4acd10bf6218feca95bf427151c283 +size 145479163 diff --git a/models/gjn/config.json b/models/gjn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gjn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gjn/vocab.txt b/models/gjn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d605491c9e29a44eb701338b655a379d2562b7b0 --- /dev/null +++ b/models/gjn/vocab.txt @@ -0,0 +1,31 @@ +| +a +n +e +b +ɛ +k +o +m +i +s +ŋ +u +l +r +ɔ +t +y +h +p +d +w +f +g +j +c +z +v +' +- + diff --git a/models/gkn/G_100000.pth b/models/gkn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3fe11a71b15be825d9a797b3bd24c035a6326007 --- /dev/null +++ b/models/gkn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b74ef3ba0c5d6d33b842cba3f95f0d6fe9ff42a3c5f98b63153317cb96e241d +size 145497585 diff --git a/models/gkn/config.json b/models/gkn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gkn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gkn/vocab.txt b/models/gkn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0a597aaf7abdef0881b8839240d25f99c04eb36d --- /dev/null +++ b/models/gkn/vocab.txt @@ -0,0 +1,55 @@ +| +b +e +n +a +l +á +à +k +g +ọ +è +o +ó +é +m +í +d +t +i +ò +̀ +ẹ +v +ì +́ +s +p +ú +u +r +y +z +õ +ã +ù +ẽ +ĩ +ṍ +j +f +̄ +̃ +ń +ʽ +ḿ +h +- +ṹ +' +ũ +ʻ +ǹ +ỳ + diff --git a/models/gld/G_100000.pth b/models/gld/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..20cd152484c819f0ea0a31e2bf983ade46e786cc --- /dev/null +++ b/models/gld/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7d9258bc16a83de04bb9bdd3aa36e05f6cfc891f1e92156a753fe40d19b6d0 +size 145481445 diff --git a/models/gld/config.json b/models/gld/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gld/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gld/vocab.txt b/models/gld/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f59709e48ae8086c075fea32ed0b478c1e07c865 --- /dev/null +++ b/models/gld/vocab.txt @@ -0,0 +1,34 @@ +| +и +а +э +н +д +у +с +о +м +ч +л +б +р +т +й +г +х +к +в +ӈ +ё +п +- +я +е +ю +ф +– +з +ь +' +ц + diff --git a/models/glk/G_100000.pth b/models/glk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..81b26773d6de6ddc3bc5cacb871b14dee39aa887 --- /dev/null +++ b/models/glk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac5ab93a1d8dc577b3ed32b938ef47fed4279fadba82988c056606921ae21ea +size 145485281 diff --git a/models/glk/config.json b/models/glk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/glk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/glk/vocab.txt b/models/glk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bbdb33bb48c35b7cadeba9378e4bacf5e710e64c --- /dev/null +++ b/models/glk/vocab.txt @@ -0,0 +1,39 @@ +ٚ +م +س +_ +ص +ؤ +د +ز +غ + +گ +پ +ئ +ظ +ث +و +ذ +ق +آ +ا +خ +ت +ب +ک +ج +ش +ع +ل +ض +ط +- +چ +أ +ه +ر +ف +ح +ن +ی diff --git a/models/gmv/G_100000.pth b/models/gmv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..16ce03d542ec3fc78b6bc1b47d434303dc9a0b61 --- /dev/null +++ b/models/gmv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb01f8b821e1f610dcec7cb35c74affde67d51f1d4aa7f2ceb30b613d7a4382 +size 145479141 diff --git a/models/gmv/config.json b/models/gmv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gmv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gmv/vocab.txt b/models/gmv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..db5db51bbca217466cc3bbb9e58e3ce4887ee2ed --- /dev/null +++ b/models/gmv/vocab.txt @@ -0,0 +1,31 @@ +a +| +i +s +e +t +o +n +d +h +y +k +g +m +l +u +z +r +b +p +q +w +x +c +f +7 +j +- +2 +1 + diff --git a/models/gna/G_100000.pth b/models/gna/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..be39f6d8caebce62ae10cf988f1dd26df38cb49d --- /dev/null +++ b/models/gna/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21673e1edc97e712e0e8aa5cda1231038c08a414045e5455fd504767314c824d +size 145500674 diff --git a/models/gna/config.json b/models/gna/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gna/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gna/vocab.txt b/models/gna/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e229eaf9d83b1b00d319542fbae040f8adf8f7a7 --- /dev/null +++ b/models/gna/vocab.txt @@ -0,0 +1,59 @@ +l +ó +z +ŋ +ɩ +ĩ +n + +õ +j +f +i +v +á +b +ʋ +k +– +ě +u +́ +û +r +ɔ +g +ɛ +̌ +̂ +e +ǎ +é +ô +h +̃ +c +ǐ +w +í +a +î +m +_ +ṹ +o +s +ũ +ú +- +â +ǒ +y +d +ê +ṍ +ɲ +ã +p +t +' diff --git a/models/gnd/G_100000.pth b/models/gnd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0eceba90408def0f210af9c1a8cbe7d4e2cae3fa --- /dev/null +++ b/models/gnd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d44de5d60dac9be1e7617e340910f81482e3053ed3699aaa984befcf7f91cff +size 145482347 diff --git a/models/gnd/config.json b/models/gnd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gnd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gnd/vocab.txt b/models/gnd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..87c47bdc15179998baeee9a079d2f4713f849fdf --- /dev/null +++ b/models/gnd/vocab.txt @@ -0,0 +1,35 @@ +| +a +m +k +à +t +e +r +i +ə +g +u +l +s +d +n +b +w +y +ŋ +z +̀ +ɗ +ì +h +è +v +f +p +ù +ɓ +' +o +ɂ + diff --git a/models/gng/G_100000.pth b/models/gng/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..62a61a55ea4a3002101e0d09069895a1c491af91 --- /dev/null +++ b/models/gng/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b52e8e0b801c6019ecba4b5aa587cfd86e64560b3b00a038b7a3a8c75ca825e +size 145489154 diff --git a/models/gng/config.json b/models/gng/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gng/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gng/vocab.txt b/models/gng/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b27c88a73d35b2c13a7e77d775d9df07ef425f73 --- /dev/null +++ b/models/gng/vocab.txt @@ -0,0 +1,44 @@ +| +n +i +a +b +ɛ +e +k +u +ɔ +t +l +y +m +o +' +w +r +s +d +h +à +ʼ +p +í +j +g +ŋ +c +f +ń +ì +ñ +̀ +é +ù +è +ó +á +- +́ +ò +ú + diff --git a/models/gof-script_latin/G_100000.pth b/models/gof-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..271368dcd0da3df136adb4aaba15e5c7d4af768e --- /dev/null +++ b/models/gof-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb53a2b8f353f8d4b317c3c22450b941aa42c905f9d1824e4c647816c15788f +size 145477601 diff --git a/models/gof-script_latin/config.json b/models/gof-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gof-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gof-script_latin/vocab.txt b/models/gof-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4583adbe868976cae5f99e08469291ab17014092 --- /dev/null +++ b/models/gof-script_latin/vocab.txt @@ -0,0 +1,29 @@ +a +| +i +s +e +n +o +t +h +y +d +k +u +g +m +b +l +r +w +7 +p +q +x +c +f +z +j +- + diff --git a/models/gog/G_100000.pth b/models/gog/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..07411dc98467b704738272e5cae85343f92bd5ea --- /dev/null +++ b/models/gog/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f97a49df7b3f128eef109da6b25ffadb14d20c2e1b9f83651ef21f01093b209 +size 145475951 diff --git a/models/gog/config.json b/models/gog/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gog/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gog/vocab.txt b/models/gog/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a11f9b15530e428d5359aacef0cf176ed236f2e3 --- /dev/null +++ b/models/gog/vocab.txt @@ -0,0 +1,27 @@ +| +a +u +i +n +o +w +e +l +m +y +k +h +g +z +s +t +c +b +v +j +d +p +' +f +- + diff --git a/models/gor/G_100000.pth b/models/gor/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..44deae6e845142c70fc965a23532cdbc6a814efe --- /dev/null +++ b/models/gor/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08805f83aa6ba7fed49617078db2b4d45181e35c3c379b8009269049b4cbbc07 +size 145476071 diff --git a/models/gor/config.json b/models/gor/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gor/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gor/vocab.txt b/models/gor/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8036a1b204bb328f37c52426d30b3e64bdce2ac3 --- /dev/null +++ b/models/gor/vocab.txt @@ -0,0 +1,27 @@ +| +o +a +l +i +u +t +m +w +n +y +e +g +h +d +b +p +s +' +r +k +- +j +c +z +f + diff --git a/models/gqr/G_100000.pth b/models/gqr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9cfddeefd4ec35b5a476510a852b20c8b35692bb --- /dev/null +++ b/models/gqr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c34b091499cdd868a7af1d306470a082e001e1cf7b38f6b96dd33be234eaeca +size 145477639 diff --git a/models/gqr/config.json b/models/gqr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gqr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gqr/vocab.txt b/models/gqr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..aef5a81d31a84b6e3dc31376742ef36d250a294e --- /dev/null +++ b/models/gqr/vocab.txt @@ -0,0 +1,29 @@ +e +h + +ɨ +k +' +p +r +u +ɓ +j +ɔ +i +o +- +n +a +w +ɗ +s +g +_ +d +t +l +m +b +y +ə diff --git a/models/grc/G_100000.pth b/models/grc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e7a5b0825fdb529fff1507fefc65ac90cb799f8 --- /dev/null +++ b/models/grc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ed883a27515b6d7cff73e9e637bcbe061af6f5791f69f6583750b6fa4975217 +size 145546735 diff --git a/models/grc/config.json b/models/grc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/grc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/grc/vocab.txt b/models/grc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f0ae19e6f1f18b5da8109794d14f778b25ccb5f8 --- /dev/null +++ b/models/grc/vocab.txt @@ -0,0 +1,119 @@ +ἤ +ὡ +ρ +ὗ +ᾧ +ψ +ἶ +ἄ +ὢ +θ +ὑ +– +ά +ῇ +π +έ +ἢ +ᾠ +ᾅ +η +γ +- +ξ +ς +κ +ὒ +μ +ΰ +ὓ +ὠ +ὼ + +ἔ +ὸ +ὶ +ὄ +ᾶ +ΐ +ᾳ +λ +υ +ἁ +ὔ +ὅ +τ +ἧ +β +ἓ +ἆ +ᾗ +ᾑ +ῆ +ἱ +ἐ +ἀ +χ +σ +ἅ +ᾄ +ἦ +ἑ +ἷ +ῒ +ῄ +ῳ +ῖ +ῷ +ὁ +ῥ +φ +ή +ὰ +ύ +ζ +ὧ +ὤ +ἂ +ο +ἣ +ἰ +ὖ +' +ῶ +ό +ἳ +ἕ +ἠ +δ +ἃ +ῴ +ί +ι +ἡ +ὐ +ϋ +ὀ +ε +ὃ +ᾖ +ν +ὥ +ῦ +α +ϊ +ᾔ +ὴ +ω +ἵ +ᾐ +ἴ +ὕ +ἥ +ὲ +ὦ +ώ +ᾷ +ῃ +ὺ +_ diff --git a/models/gri/G_100000.pth b/models/gri/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d945dd6253ac3db7e849348d013647b31d6cb77b --- /dev/null +++ b/models/gri/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe65ade526c237e04bb92adf67f670830a8c13709225ffdb4cbc3adb1a92752e +size 145478353 diff --git a/models/gri/config.json b/models/gri/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gri/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gri/vocab.txt b/models/gri/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b06a2dce38f45ccae938b583d5a6f613537ababc --- /dev/null +++ b/models/gri/vocab.txt @@ -0,0 +1,30 @@ +i +a +_ +6 +k +– +r +u +p +l +s +t +1 +m +b +d +e +v +g +j +h +n +o +4 +2 +- +' +q + +0 diff --git a/models/grn/G_100000.pth b/models/grn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6efb6feb7722e713d068dc32a443e03e06cd3f98 --- /dev/null +++ b/models/grn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1c7674f1c95a4748391772778acf596b8eda6bdbcd2db0cb7f493ca8c448dc1 +size 145496030 diff --git a/models/grn/config.json b/models/grn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/grn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/grn/vocab.txt b/models/grn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3e48853adc1b87cc02d5fb122a63bfd23d3d8df6 --- /dev/null +++ b/models/grn/vocab.txt @@ -0,0 +1,53 @@ +3 +ñ +m +l +ỹ +ũ +h +k +6 +8 +a + +ẽ +v +ý +d +7 +p +n +q +ã +4 +b +1 +0 +ú +9 +f +— +c +j +y +ó +í +é +_ +r +- +e +u +t +5 +2 +i +á +g +' +z +õ +ĩ +o +x +s diff --git a/models/grt/G_100000.pth b/models/grt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef360414e7de7e5798b44306c6378f164dade855 --- /dev/null +++ b/models/grt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eb0d47deef1d874c7adbd6130a45064e6eb13754f8b9d3d7ca144b92db3e891 +size 145503617 diff --git a/models/grt/config.json b/models/grt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/grt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/grt/vocab.txt b/models/grt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..898b1f7d796e1879a3e6c14aed46b4162a833065 --- /dev/null +++ b/models/grt/vocab.txt @@ -0,0 +1,63 @@ +| +া +ি +ন +ং +্ +ক +ম +ব +র +স +ো +আ +খ +গ +দ +ে +জ +য +় +ল +চ +ত +ু +ই +হ +ও +থ +প +উ +ফ +ী +এ +ৎ +শ +ষ +ট +ূ +অ +- +ৌ +ড +' +ণ +ছ +ধ +ৈ +ঠ +ঘ +ঃ +ভ +ঝ +ঐ +ঙ +ঊ +1 +ঈ +ঁ +s +l +d +a + diff --git a/models/gso/G_100000.pth b/models/gso/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..75ca6136f307c892a4ea29b3407a020b898f9ed8 --- /dev/null +++ b/models/gso/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a35eced5a17e8155b80c74d11e9287c74cd7dc6d79b816510c706d6812fc13 +size 145483009 diff --git a/models/gso/config.json b/models/gso/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gso/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gso/vocab.txt b/models/gso/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d4e32dda1055e6e29a5516e5231098a708971277 --- /dev/null +++ b/models/gso/vocab.txt @@ -0,0 +1,36 @@ +| +a +n +ɛ +i +m +ɔ +g +e +h +k +o +w +s +b +t +d +u +l +p +z +y +r +f +- +̂ +j +é +c +ï +v +q +x +è +ë + diff --git a/models/gub/G_100000.pth b/models/gub/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b510fc10b5f259e177f7766938762e9cd222e2c3 --- /dev/null +++ b/models/gub/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d53e68f9d019d4b2796c723de8496b3278df1d78f284d5a281bed3001f32e10d +size 145479923 diff --git a/models/gub/config.json b/models/gub/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gub/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gub/vocab.txt b/models/gub/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1f854ffec80612affef65fb53ffd705450852aef --- /dev/null +++ b/models/gub/vocab.txt @@ -0,0 +1,32 @@ +à +8 +4 +t +- +k +p +u +1 +0 +x +i +z +3 +_ +a +e +m +' +6 +n +5 +w +g +r +h +o +7 + +9 +2 +y diff --git a/models/guc/G_100000.pth b/models/guc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..14cded9470c2f83c72c4a79f93ff30941a3591c9 --- /dev/null +++ b/models/guc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dda7e7d7a42c36f19bef4e970825233ab621b9774904e3bb2af13783ac794317 +size 145489801 diff --git a/models/guc/config.json b/models/guc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/guc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guc/vocab.txt b/models/guc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..543e09376464ee58e1e5a90a286ea22a362b3dfa --- /dev/null +++ b/models/guc/vocab.txt @@ -0,0 +1,45 @@ +a +| +n +i +ü +e +s +t +k +u +j +l +m +' +o +h +w +p +y +r +c +d +ú +— +b +í +é +g +f +á +ó +0 +v +z +q +1 +2 +x +5 +4 +ñ +3 +6 +7 + diff --git a/models/gud/G_100000.pth b/models/gud/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..72df32142edce08c5ce5b6f5441fb7398e5d5b16 --- /dev/null +++ b/models/gud/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745fc6a2c34a68e9eb33aea3e10ac46715d449461baaf05fed441b8dab819290 +size 145480691 diff --git a/models/gud/config.json b/models/gud/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gud/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gud/vocab.txt b/models/gud/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e3641fca4d983ae3467c204c94ab93b39929430a --- /dev/null +++ b/models/gud/vocab.txt @@ -0,0 +1,33 @@ +| +a +l +n +ɩ +' +ɔ +‐ +y +ɛ +k +ʋ +i +b +m +e +t +g +c +o +h +p +s +u +w +z +d +f +ŋ +j +v +0 + diff --git a/models/guh/G_100000.pth b/models/guh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d87aed5f43de0612ef77e850be38f38b20b706ec --- /dev/null +++ b/models/guh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9070bda53ba155f1e9fd7ac5bbd36f5e3137bac6c4be541aa595f3c75997fa +size 145489898 diff --git a/models/guh/config.json b/models/guh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/guh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guh/vocab.txt b/models/guh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d435da392f4b5e83b8cd6b9bd8314f4e24fb51a8 --- /dev/null +++ b/models/guh/vocab.txt @@ -0,0 +1,45 @@ +j +4 +s +y +ó +í +d +_ +- +— + +l +z +' +b +á +h +r +̱ +v +u +m +ë +g +ñ +ú +7 +q +o +­ +t +i +ĩ +õ +c +a +é +n +f +e +w +p +k +x +ã diff --git a/models/guj/G_100000.pth b/models/guj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4388ca95e7c36800d610069f3cf7c0f05e65a055 --- /dev/null +++ b/models/guj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:427ac3c74f61be494b389cae7d771311d0bcf576f4e2f1b22f257539e26e323a +size 145501427 diff --git a/models/guj/config.json b/models/guj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/guj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guj/vocab.txt b/models/guj/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..af5307892fbac94cc8c58520d36d960662c63003 --- /dev/null +++ b/models/guj/vocab.txt @@ -0,0 +1,60 @@ +| +ા +ે +ત +ન +ર +્ +મ +ી +ક +ં +ો +પ +વ +ુ +સ +ય +હ +જ +થ +િ +શ +ણ +છ +લ +દ +આ +ઓ +અ +એ +ઈ +બ +ગ +ખ +ધ +ભ +ટ +ળ +ૂ +ચ +ડ +ષ +ઉ +ફ +ઘ +ઠ +ૃ +' +ઊ +ઇ +ઞ +ઝ +ઢ +ૈ +ઃ +ૌ +- +ઋ +ઐ + diff --git a/models/guk/G_100000.pth b/models/guk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..319ae5fd8e75ca5d1baed3d51a283dddf875b4e6 --- /dev/null +++ b/models/guk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a31b5b94a89e7ad1309c6575321c34ebf031b5766f78bac771d6e3e378c925c4 +size 145476741 diff --git a/models/guk/config.json b/models/guk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..993d1dedb1d0c8e820b98f9e2f019ff166327038 --- /dev/null +++ b/models/guk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.uroman", + "validation_files": "dev.uroman", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guk/vocab.txt b/models/guk/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..09c467a4acb08779bb0152c2c415e9333fb653c4 --- /dev/null +++ b/models/guk/vocab.txt @@ -0,0 +1,28 @@ + +' +- +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +w +x +y +z diff --git a/models/gum/G_100000.pth b/models/gum/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cadec366d506dcc258b460fbace28449d1fcb21a --- /dev/null +++ b/models/gum/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15daa1d32b38e5891e23059fc95d05a9b905701e00e0e059beef02924b568073 +size 145488372 diff --git a/models/gum/config.json b/models/gum/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gum/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gum/vocab.txt b/models/gum/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5d1190b68dc95ca6f0d3fe7b80e92d7d866369df --- /dev/null +++ b/models/gum/vocab.txt @@ -0,0 +1,43 @@ +| +a +i +r +n +e +ø +u +g +m +b +t +s +k +h +c +l +w +p +y +d +o +ñ +ú +j +— +í +z +é +á +' +f +v +- +ó +q +0 +x +1 +2 +4 +3 + diff --git a/models/guo/G_100000.pth b/models/guo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..55c3dbebfc916b00bc06b09cbf2bef6587ccddf7 --- /dev/null +++ b/models/guo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21bf310928cdad9642a9276e9ac4f0b8760227c1c4e2d7b518f92a3bd2511293 +size 145484527 diff --git a/models/guo/config.json b/models/guo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/guo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guo/vocab.txt b/models/guo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d562c9c2b43ed0d90ef20e60c88b1cdc8e7f6b8 --- /dev/null +++ b/models/guo/vocab.txt @@ -0,0 +1,38 @@ +a +| +e +j +i +n +t +l +o +x +s +w +p +m +ʉ +c +h +k +b +d +- +u +r +f +' +y +ú +— +í +g +é +á +v +ó +z +q +ñ + diff --git a/models/guq/G_100000.pth b/models/guq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fad73ea6950eab0bdf02d0be2b07976c8519161e --- /dev/null +++ b/models/guq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2939e2bf5178b3d11d96127b6eb3781bad44b46c7bf89602484639622b44030b +size 145490693 diff --git a/models/guq/config.json b/models/guq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/guq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guq/vocab.txt b/models/guq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7bf01d157b12272d04b0e43c215aad24557799de --- /dev/null +++ b/models/guq/vocab.txt @@ -0,0 +1,46 @@ +g +0 +l +u +e +o +i +ẽ +s +_ +j +m +f +­ +k +á +ó +í +x +a +— +b +z + +' +c +é +̃ +d +q +– +ũ +ñ +n +1 +5 +h +õ +w +ã +r +t +y +p +ĩ +2 diff --git a/models/guu/G_100000.pth b/models/guu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e859063afb7669f96fcc3bcfbde2f8038b0f8f3c --- /dev/null +++ b/models/guu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b075053f9fc9b28bd6022b35a0a1ec78d2fa9727fe9ef72894da6894dec61247 +size 145490540 diff --git a/models/guu/config.json b/models/guu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/guu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/guu/vocab.txt b/models/guu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d463b75101a3ab09bf585723eb4af0abfeec0481 --- /dev/null +++ b/models/guu/vocab.txt @@ -0,0 +1,46 @@ +' +r +8 +ä +̦ +s +— +q +a +g +9 +2 +1 + +c +b +5 +7 +ó +n +y +p +í +á +ú +3 +e +4 +ö +_ +l +i +f +j +6 +t +u +0 +h +é +o +m +ñ +v +w +d diff --git a/models/gux/G_100000.pth b/models/gux/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b313ae2930d60f609602dba8bbf02d091e086de7 --- /dev/null +++ b/models/gux/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e590a06f66217a9ae8dcd254bd9ed575bc957f60a7d5f0e9a5d0dae2a808c11b +size 145476087 diff --git a/models/gux/config.json b/models/gux/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gux/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gux/vocab.txt b/models/gux/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..db3767d5cc505e1a59391fadc4f3a6a654f13192 --- /dev/null +++ b/models/gux/vocab.txt @@ -0,0 +1,27 @@ +| +a +i +n +e +l +u +o +b +d +k +y +m +t +g +p +s +j +c +f +w +ŋ +ñ +h +- +v + diff --git a/models/gvc/G_100000.pth b/models/gvc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..610d55e15e7d19dc80e396d982ef1cd586dad5a5 --- /dev/null +++ b/models/gvc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c49ee437d20424b0ddcaa73e4b2cf16cabb58dc979f9197335c01fe38c06614 +size 145488471 diff --git a/models/gvc/config.json b/models/gvc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gvc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gvc/vocab.txt b/models/gvc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..81c9f7fb60a8258b018764f953be1fb64414fdc9 --- /dev/null +++ b/models/gvc/vocab.txt @@ -0,0 +1,43 @@ +| +a +i +r +u +o +h +e +̶ +t +n +c +m +s +j +y +p +ã +d +w +b +ñ +ũ +g +í +õ +— +l +ĩ +ó +q +f +ẽ +v +é +z +' +á +ú +k +à +x + diff --git a/models/gvl/G_100000.pth b/models/gvl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6c99cdf4d26e2f738f4b40ed3c3d793da0b66bc --- /dev/null +++ b/models/gvl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27886ffecabcfdf869cfd09f811f5ab935da41b21f0472efbee8878518ebf44d +size 145496031 diff --git a/models/gvl/config.json b/models/gvl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gvl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gvl/vocab.txt b/models/gvl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5892a1ccfa4ead392b6b485c208acd73ab64dc78 --- /dev/null +++ b/models/gvl/vocab.txt @@ -0,0 +1,53 @@ +| +e +n +a +k +i +d +t +g +o +r +u +m +l +' +è +s +j +é +b +ɔ +ɓ +- +à +w +y +á +ê +p +ò +̂ +h +– +c +ɗ +ó +ú +v +z +ĵ +q +ḿ +̀ +f +x +ŝ +ù +ĝ +š +í +́ +ń + diff --git a/models/gwi/G_100000.pth b/models/gwi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a49027a2dcce26f641d468cff40001c73de07c3a --- /dev/null +++ b/models/gwi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b51e2e8bed65fee88bbf3a6959da3d373ed8fbe40b5ef4d70ba080db17d5dc8b +size 145490685 diff --git a/models/gwi/config.json b/models/gwi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gwi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gwi/vocab.txt b/models/gwi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..70be38b62e3722d67b49fbc54cfe1a3368758602 --- /dev/null +++ b/models/gwi/vocab.txt @@ -0,0 +1,46 @@ +0 +4 +9 +ų +p +8 +v +2 +3 +c +j +' +a +į +ą +q +f +r +- +k +t +l +i +s +_ +g +o +u + +d +ǫ +w +ł +b +7 +n +x +y +z +m +5 +6 +1 +ę +e +h diff --git a/models/gwr/G_100000.pth b/models/gwr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c8b1b38c263259cff424a46f76363056e077e531 --- /dev/null +++ b/models/gwr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfca5d3ce3017d03fb62986d09d2f6ee47952c6402338352f3275d301aa16420 +size 145485297 diff --git a/models/gwr/config.json b/models/gwr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gwr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gwr/vocab.txt b/models/gwr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..49422de6fee9adabf4eaba37548c9791841ea92d --- /dev/null +++ b/models/gwr/vocab.txt @@ -0,0 +1,39 @@ +o +f +y +5 +e +g +4 +a +k +c +n +9 +j +w +0 +z +ŋ +8 +p +m +s +_ +1 +3 +ʼ +6 + +- +u +t +7 +' +r +d +i +v +b +l +2 diff --git a/models/gym/G_100000.pth b/models/gym/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..87849ab22fbe5577f7ae3c5d4f76e9313a962258 --- /dev/null +++ b/models/gym/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e1cdd38ebd3006474d5e2732b55cdf9f41345c086b38f8b2bdd24a38552f751 +size 145492303 diff --git a/models/gym/config.json b/models/gym/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gym/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gym/vocab.txt b/models/gym/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..345cc44b3d4519670c705be36bbbfcc7c6a720a5 --- /dev/null +++ b/models/gym/vocab.txt @@ -0,0 +1,48 @@ +| +e +a +n +k +i +r +t +ä +b +m +w +u +o +ö +g +d +j +y +s +ü +ñ +l +p +c +é +h +í +f +ó +v +á +z +q +1 +2 +0 +ú +6 +8 +4 +5 +3 +- +9 +x +7 + diff --git a/models/gyr/G_100000.pth b/models/gyr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9af63161cbaa7fe0b1b1bd40f0c15a439dcb1e1c --- /dev/null +++ b/models/gyr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9977d698524ad9aee6af8e31cbd1dd2cefcca876ad2ea6a15e11afd6fce2578b +size 145489159 diff --git a/models/gyr/config.json b/models/gyr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/gyr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/gyr/vocab.txt b/models/gyr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6e4cbb8c1c2789701a0c061da489590ec5b5851a --- /dev/null +++ b/models/gyr/vocab.txt @@ -0,0 +1,44 @@ +| +e +a +o +r +i +p +s +v +u +ɨ +k +' +t +m +y +n +b +w +d +ä +g +c +ñ +h +ë +ü +j +̇ +l +ö +ú +ï +í +– +- +é +f +ó +á +z +q +x + diff --git a/models/had/G_100000.pth b/models/had/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..93d0068d9a09e8b87bb9354e4e836830597b9126 --- /dev/null +++ b/models/had/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3b22241c53085eed0db946d8a63b3370fa53f6c200bfc84e35f128b86345036 +size 145476103 diff --git a/models/had/config.json b/models/had/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/had/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/had/vocab.txt b/models/had/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..153f0678ff226c3e0c42ee0ac39c5c5d4431165a --- /dev/null +++ b/models/had/vocab.txt @@ -0,0 +1,27 @@ +d +r +c +s +y +b +z +a +h +l +m +i +n +k +_ +- +o +g +' +p +t +w +u + +j +e +f diff --git a/models/hag/G_100000.pth b/models/hag/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d5b8e3211e2e3d4b91c960992ef2e765af01afd --- /dev/null +++ b/models/hag/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1552b2efd1d83aec8f22d1c9b72518cdc2022918d2f5f16f0474a32bfdad671 +size 145478398 diff --git a/models/hag/config.json b/models/hag/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hag/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hag/vocab.txt b/models/hag/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4dc813bb5e48df8bb21c52428610d7368471f3d7 --- /dev/null +++ b/models/hag/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +n +e +u +s +m +b +o +y +r +k +l +t +ŋ +d +p +g +z +w +h +j +c +f +v +q +` +- + diff --git a/models/hak/G_100000.pth b/models/hak/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5235439107a5fa88535926eafd6cb5c3449f78aa --- /dev/null +++ b/models/hak/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99177590fb2b66b9d73e6a9e19829e14ccf0bc0fa912513f319af6788d6cf5d2 +size 145488379 diff --git a/models/hak/config.json b/models/hak/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hak/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hak/vocab.txt b/models/hak/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5038c90bc1cfe5b1368d1f82535db95cc452fbb7 --- /dev/null +++ b/models/hak/vocab.txt @@ -0,0 +1,43 @@ +| +n +- +h +i +k +g +t +u +e +c +s +o +ì +a +y +l +m +â +p +ê +ó +ṳ +û +î +à +ò +̍ +á +f +v +ô +ú +ù +è +é +í +` +́ +̀ +̂ +' + diff --git a/models/hap/G_100000.pth b/models/hap/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c039e8cf36f38feda9bf121901577a65c53de13 --- /dev/null +++ b/models/hap/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45e1837af41a391d092ffc81e8f7c3e8f7d1ad5bb8c342a1b9b9ce7ce6cb3ff +size 145483735 diff --git a/models/hap/config.json b/models/hap/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hap/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hap/vocab.txt b/models/hap/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..15fe5cffd95bf125421c7ee3603c974d661419fa --- /dev/null +++ b/models/hap/vocab.txt @@ -0,0 +1,37 @@ +' +t +h +p +2 +8 +4 +a +i +b +m +n +- +d +z + +e +9 +5 +g +c +1 +f +7 +j +u +s +r +l +o +3 +k +y +0 +6 +w +_ diff --git a/models/hat/G_100000.pth b/models/hat/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc8ec8d5dcf35b41d00d8d073e42add888b2fa54 --- /dev/null +++ b/models/hat/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ca0a62f2cf7a37605026c0644fb469570d21e390d5ad3a7293af0672600d42 +size 145479901 diff --git a/models/hat/config.json b/models/hat/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hat/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hat/vocab.txt b/models/hat/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c4d9fba7e16a0840691c3134b8c0a6863b29b1ec --- /dev/null +++ b/models/hat/vocab.txt @@ -0,0 +1,32 @@ +| +n +a +e +o +i +t +l +p +u +y +s +m +k +è +r +d +v +b +w +f +j +ò +g +z +' +h +c +— +à +- + diff --git a/models/hau/G_100000.pth b/models/hau/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..06a7ef2fc0ee9d2ce7a652b4c7cf748f133022d4 --- /dev/null +++ b/models/hau/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a783ab673eda35ea4f1da94a75a221f694a78acd4ff522d0929ad57953cd30f1 +size 145481577 diff --git a/models/hau/config.json b/models/hau/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hau/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hau/vocab.txt b/models/hau/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..82df4abd54f42dd3774e901047d566f7e3b353fd --- /dev/null +++ b/models/hau/vocab.txt @@ -0,0 +1,34 @@ +d +_ +ɗ +s +g +t +o +y +r +a +u +ā +i +ū +c +n +j +b +ɓ +l +- +6 +e +k +f +ă +' +w +h +z +m +ƙ +ˈ + diff --git a/models/hay/G_100000.pth b/models/hay/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..544178f883e829f1babdbf99f7d974c7b15ac2cb --- /dev/null +++ b/models/hay/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebcc3e91089ee3a5aa81c2726ed815d9854687e95886b47057f2f264efb9870a +size 145482991 diff --git a/models/hay/config.json b/models/hay/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hay/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hay/vocab.txt b/models/hay/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a1e714d5e97ec54c851e8b0a9bed1e5c85aec89a --- /dev/null +++ b/models/hay/vocab.txt @@ -0,0 +1,36 @@ +a +| +i +u +n +o +e +b +k +m +r +g +w +t +y +h +l +s +' +z +d +j +f +p +c +v +0 +1 +2 +4 +3 +5 +8 +6 +7 + diff --git a/models/heb/G_100000.pth b/models/heb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..110e5039b61990eebfe4ce3ec6f7ca1c9ab2472c --- /dev/null +++ b/models/heb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abc5d8ab54ad39286e48c8016698d8a533e1792808e55624bf3bdce066a23ebe +size 145479939 diff --git a/models/heb/config.json b/models/heb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/heb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/heb/vocab.txt b/models/heb/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..d96bb64a23b1fb113d4604078873207dab3787d2 --- /dev/null +++ b/models/heb/vocab.txt @@ -0,0 +1,32 @@ +| +ו +י +א +ה +ל +ת +ש +ב +ם +ר +מ +נ +כ +ע +ד +ח +ק +ן +פ +ג +ס +צ +ז +ך +ט +ף +ץ +— +' +- + diff --git a/models/heh/G_100000.pth b/models/heh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..67612cacc2c022e001d43d1af85b62c6ca8a8912 --- /dev/null +++ b/models/heh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea954e8012ddf3ecf2ded64d9ed7b0526bc8feabea39fea994d8b9e29c685ebb +size 145476069 diff --git a/models/heh/config.json b/models/heh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/heh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/heh/vocab.txt b/models/heh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7d1a31a1c5d79dc6458965c662cf8e8358281e56 --- /dev/null +++ b/models/heh/vocab.txt @@ -0,0 +1,27 @@ +| +a +i +u +e +n +l +w +k +m +v +g +o +y +s +t +p +h +d +b +f +c +' +z +j +- + diff --git a/models/hif/G_100000.pth b/models/hif/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..854d4ff550fb3a091f5a213c7ff58db1cd9f1c2b --- /dev/null +++ b/models/hif/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bb75600e3651b48070b964ae07a2737e2665fb1795f4b679371c4c760ec1ff9 +size 145486169 diff --git a/models/hif/config.json b/models/hif/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hif/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hif/vocab.txt b/models/hif/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a172408551abfd50201f7bc924a41a217b6134f7 --- /dev/null +++ b/models/hif/vocab.txt @@ -0,0 +1,40 @@ +| +a +e +h +i +k +r +u +s +m +n +o +l +b +t +g +p +j +d +y +c +w +ñ +ṭ +ṛ +f +ḍ +z +v +- +' +q +x +0 +4 +1 +6 +– +2 + diff --git a/models/hig/G_100000.pth b/models/hig/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c44cd299228b1fc64452af4b98e0d29233f84f7d --- /dev/null +++ b/models/hig/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f357a98b21a1de720d81f17288a03fc5f982ff1dc9a959b04eeb21775f3a5850 +size 145483009 diff --git a/models/hig/config.json b/models/hig/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hig/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hig/vocab.txt b/models/hig/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ac603bb1f445184cca837a924bbc43a7374889eb --- /dev/null +++ b/models/hig/vocab.txt @@ -0,0 +1,36 @@ +| +ə +a +m +i +n +t +w +k +y +l +s +h +r +g +á +b +d +u +z +e +j +v +́ +- +p +f +c +ɗ +ɓ +— +' +ú +o +í + diff --git a/models/hil/G_100000.pth b/models/hil/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a0b2d0e33d11ee059f258a93d9f9ba970bfd98c --- /dev/null +++ b/models/hil/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f40d9db1e792746effd95f18e9066708283d6e797aa7c9ea9a725a0a5952e6 +size 145487615 diff --git a/models/hil/config.json b/models/hil/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hil/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hil/vocab.txt b/models/hil/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8b05f41b21c64949d59dbee29db979e81dc255d2 --- /dev/null +++ b/models/hil/vocab.txt @@ -0,0 +1,42 @@ +a +| +n +g +i +s +o +k +m +l +t +u +p +d +y +b +h +r +w +e +- +j +c +0 +f +v +z +q +— +1 +2 +4 +5 +3 +x +9 +6 +7 +' +8 +ñ + diff --git a/models/hin/G_100000.pth b/models/hin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..154702dfcec0fc671ddaa9c9962814cdecf86717 --- /dev/null +++ b/models/hin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1d5e47edd7368ff40ff5673ddfc606ea713e785420d26c2da396b555458d3b +size 145510619 diff --git a/models/hin/config.json b/models/hin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hin/vocab.txt b/models/hin/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d13a34fe8465b49a513728567e18b8dc3ee8ebc --- /dev/null +++ b/models/hin/vocab.txt @@ -0,0 +1,72 @@ +फ +4 +1 +- +अ +इ +8 + +0 +छ +न +ए +ऐ +़ +ष +ि +ँ +च +2 +ऑ +थ +भ +ी +‍ +ॅ +3 +ञ +ै +ु +ठ +ं +ॉ +उ +_ +ई +ः +ह +ध +ल +र +स +ब +ख +ण +' +` +व +घ +प +ग +ढ +य +े +् +ा +आ +ड +ज +झ +श +औ +ो +द +ृ +ौ +ऊ +ू +ओ +ट +त +क +म diff --git a/models/hlb/G_100000.pth b/models/hlb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..89e5bc725375df6644b71cb10059f1370c2289b5 --- /dev/null +++ b/models/hlb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09126a16492da0bb5c933d81744dc0be895a179105324efb807c64b2e80d14ae +size 145494509 diff --git a/models/hlb/config.json b/models/hlb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hlb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hlb/vocab.txt b/models/hlb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..30b96237411c440dd4b62eff36c177579097a13b --- /dev/null +++ b/models/hlb/vocab.txt @@ -0,0 +1,51 @@ +ड +़ +_ +ल +ु +ज +ि +स +ई +उ +फ +अ +थ +घ +झ +े +' +व +आ +य +न +ए +त +ू +ट +ख +- +क +ो +ध +ढ +ं +र +ब +म +प +ऊ +ग +ठ + +ा +च +इ +छ +ह +द +ओ +‍ +भ +ी +् diff --git a/models/hlt/G_100000.pth b/models/hlt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5807fb28ecc6f04d151f3b1d080aac0dce6bad98 --- /dev/null +++ b/models/hlt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ea83703c3dc562eab9ccea9a3db4fcf6be6f479cbdaf96be234f80ce8b5e98a +size 145476853 diff --git a/models/hlt/config.json b/models/hlt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hlt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hlt/vocab.txt b/models/hlt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4f6a1207358848fc01a964e66b278c04cb782436 --- /dev/null +++ b/models/hlt/vocab.txt @@ -0,0 +1,28 @@ +r +y +u +o +m +v +g +i +h +s +n +e +d +l +w +z + +_ +k +' +- +t +c +f +a +j +b +p diff --git a/models/hne/G_100000.pth b/models/hne/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb499309f800934f5405a4fdb55edaee7b2639a8 --- /dev/null +++ b/models/hne/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52c8a11f56a4b65cbfd5f42559f3130133650cfb0fc6b0212f531d6fc2ea8f9 +size 145502173 diff --git a/models/hne/config.json b/models/hne/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hne/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hne/vocab.txt b/models/hne/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8542c0b9c55a9c8a4785844cc72a4fdd0b793268 --- /dev/null +++ b/models/hne/vocab.txt @@ -0,0 +1,61 @@ +| +ा +र +े +क +न +म +ह +स +ि +ल +त +ं +ब +प +य +ी +व +ो +अ +ज +द +ु +ऊ +ओ +ू +ग +ख +ए +् +च +इ +थ +आ +भ +ई +ड +ध +घ +़ +ट +‍ +छ +ठ +झ +उ +- +फ +ढ +ौ +ै +ः +– +' +6 +4 +0 +5 +3 +ृ + diff --git a/models/hnn/G_100000.pth b/models/hnn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b84380608c6c248cb0f765a8b5866bfc5c866ef4 --- /dev/null +++ b/models/hnn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca56187aeae6d52a286e278d05d2b77faaf717c6e368b352e33c8990da879f6a +size 145471567 diff --git a/models/hnn/config.json b/models/hnn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hnn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hnn/vocab.txt b/models/hnn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fb55d824eee124740da569acaa9494b353bc11d6 --- /dev/null +++ b/models/hnn/vocab.txt @@ -0,0 +1,21 @@ +- +p +s +d +h +i + +b +m +u +a +r +g +_ +' +y +w +k +l +n +t diff --git a/models/hns/G_100000.pth b/models/hns/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2dbf5ab0fc65ab35bbf040e4c9bc8c444e250074 --- /dev/null +++ b/models/hns/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3c0807b0cbc2a421a1bed71be6ee3f7bf5f6afe1709c88f699b6056c3da5b0 +size 145478399 diff --git a/models/hns/config.json b/models/hns/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hns/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hns/vocab.txt b/models/hns/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..86cdfd81e6c28676646f0e23bfaa09f7a1ec73f3 --- /dev/null +++ b/models/hns/vocab.txt @@ -0,0 +1,30 @@ +a +| +e +i +k +h +o +r +t +s +b +l +n +d +m +j +g +p +w +u +- +f +' +y +é +ó +á +z +í + diff --git a/models/hoc/G_100000.pth b/models/hoc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..358c5849f5e103c4838d7252cd5edf3be6c0452c --- /dev/null +++ b/models/hoc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a4d18a87304a24cc6e074f36256094ba492220837618be03dd4d487f1bae7f +size 145497579 diff --git a/models/hoc/config.json b/models/hoc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hoc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hoc/vocab.txt b/models/hoc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4ead993bd21b3785a2aac96bd155e348207cc587 --- /dev/null +++ b/models/hoc/vocab.txt @@ -0,0 +1,55 @@ +ଟ +ଢ +ସ +ି +ଫ + +ମ +' +ଥ +- +୍ +ୃ +ୌ +ୈ +ନ +ତ +ୟ +ଞ +େ +– +ଃ +କ +ହ +ଅ +ଶ +ଡ +ଂ +ଖ +ଇ +ଣ +ଙ +ଉ +ଭ +‍ +ଁ +ୀ +— +ୋ +ର +_ +ଦ +ୱ +i +ୁ +ଆ +ଚ +ା +ବ +ପ +ଳ +ଲ +ଏ +ଗ +ଜ +4 diff --git a/models/hoy/G_100000.pth b/models/hoy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0e1e6405b3f2b702db53604b359a0f30d9bda5a --- /dev/null +++ b/models/hoy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36d3bd227bd8c2ec635d57ddcc2916e1b9fecb0dd4bbb304830c1dac629032d +size 145502313 diff --git a/models/hoy/config.json b/models/hoy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hoy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hoy/vocab.txt b/models/hoy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8b89cef23634d48ae3b6ca0336b2aa7c1accee7e --- /dev/null +++ b/models/hoy/vocab.txt @@ -0,0 +1,61 @@ +| +ा +द +र +न +ु +क +ी +् +त +स +य +ि +ळ +म +आ +ो +े +ं +अ +ग +ल +व +ह +प +ट +ब +ई +श +ऊ +ज +च +इ +उ +‍ +ू +भ +ड +ध +ख +ढ +ॉ +घ +ण +ै +फ +ौ +थ +- +ठ +ष +ओ +छ +औ +झ +ए +़ +ृ +ँ +' + diff --git a/models/hto/G_100000.pth b/models/hto/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ddc36965de567d0e16f0ae94e2f0af9fcc7d1d00 --- /dev/null +++ b/models/hto/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18af7ea4f1e59d9f6538f712c30413f35ff3396ec5d5e144638f7ffee6658b36 +size 145485289 diff --git a/models/hto/config.json b/models/hto/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hto/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hto/vocab.txt b/models/hto/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..140990cbbde79ca4e83cf494a64d236656fd14db --- /dev/null +++ b/models/hto/vocab.txt @@ -0,0 +1,39 @@ +| +a +ù +e +i +o +n +m +u +d +c +l +r +f +j +í +t +á +z +ó +ñ +b +é +à +g +s +ú +p +h +v +ü +k +q +' +y +` +x +- + diff --git a/models/hub/G_100000.pth b/models/hub/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..55f1a9e7310fd7ff2f75ba76317ea6dcf5b34a2b --- /dev/null +++ b/models/hub/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71973e94486a5f0990754db81d063caba2095c5d872ef7c28a3f9a88f9eab26b +size 145482233 diff --git a/models/hub/config.json b/models/hub/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hub/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hub/vocab.txt b/models/hub/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7b5459a5309e2f59b1bedc0638ce7931876ab083 --- /dev/null +++ b/models/hub/vocab.txt @@ -0,0 +1,35 @@ +a +| +i +u +n +t +k +m +s +r +e +h +c +j +w +p +y +g +o +l +ú +d +í +b +á +é +v +f +ó +z +' +q +x +ñ + diff --git a/models/hui/G_100000.pth b/models/hui/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c89398124b71d1cd86b454a428586d657cccaca --- /dev/null +++ b/models/hui/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3ff0dd79e846a3636a780a413f667d36f4c6803646008d4b8cdfee479ad728 +size 145476067 diff --git a/models/hui/config.json b/models/hui/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hui/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hui/vocab.txt b/models/hui/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a5559d9a0429b00bada1c1879e686ded97cc01a6 --- /dev/null +++ b/models/hui/vocab.txt @@ -0,0 +1,27 @@ +a +| +i +n +o +e +g +b +l +u +h +m +d +r +w +t +y +k +p +s +í +̱ +̠ +á +- +é + diff --git a/models/hun/G_100000.pth b/models/hun/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e3fbfc7ff411712f8a31a4bb65e9f0507416474 --- /dev/null +++ b/models/hun/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1115c735b24ba961d8ac0035fcd226d6ec6398dbf601139a974b9e743cdb032 +size 145483652 diff --git a/models/hun/config.json b/models/hun/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hun/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hun/vocab.txt b/models/hun/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..2fb4545b7d92e0110034e011c6a2976f6d4413a3 --- /dev/null +++ b/models/hun/vocab.txt @@ -0,0 +1,37 @@ +r +t +l +ó +- +e +k +i +y +í + +b +d +é +h +z +p +n +ö +a +j +ő +u +ü +o +v +c +g +_ +á +ú +x +s +m +ű +f +– diff --git a/models/hus-dialect_centralveracruz/G_100000.pth b/models/hus-dialect_centralveracruz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..23d949fcec24fac95be5a4b97bf1ed74cd7a82b4 --- /dev/null +++ b/models/hus-dialect_centralveracruz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e63ff17a01450df4eb6f8f6d3588921cf0fb1b2452f739a608b3f494475be06 +size 145478403 diff --git a/models/hus-dialect_centralveracruz/config.json b/models/hus-dialect_centralveracruz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/hus-dialect_centralveracruz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hus-dialect_centralveracruz/vocab.txt b/models/hus-dialect_centralveracruz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..91c9391e43d07c5e38280f522b6f4e51ba137bdb --- /dev/null +++ b/models/hus-dialect_centralveracruz/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +t +n +k +l +j +e +s +b +h +o +u +x +c +w +m +y +d +p +r +— +g +f +á +ó +ß +œ + diff --git a/models/hus-dialect_westernpotosino/G_100000.pth b/models/hus-dialect_westernpotosino/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..aeb1e8aeb65ff3a5daa0785977c455c4a49c2fc9 --- /dev/null +++ b/models/hus-dialect_westernpotosino/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd74924ffa67a9c658ef194a74d16f46580e5daeed898231e8c7f9b904582012 +size 145489907 diff --git a/models/hus-dialect_westernpotosino/config.json b/models/hus-dialect_westernpotosino/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hus-dialect_westernpotosino/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hus-dialect_westernpotosino/vocab.txt b/models/hus-dialect_westernpotosino/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e67edc0c640d80eb20994ba96fd929848a244916 --- /dev/null +++ b/models/hus-dialect_westernpotosino/vocab.txt @@ -0,0 +1,45 @@ +4 +é +b +' +á +ñ +_ +j + +k +í +6 +ó +f +d +v +ú +r +t +7 +e +u +a +2 +w +0 +3 +p +h +s +g +9 +n +5 +à +i +x +1 +z +- +o +m +y +c +l diff --git a/models/huu/G_100000.pth b/models/huu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e704977f4f0f336744903f49efeabe555f6ffdd0 --- /dev/null +++ b/models/huu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a530e463e9068d4948db69516622588fcd5ebe4980c31ae515ed52542914b01 +size 145483863 diff --git a/models/huu/config.json b/models/huu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/huu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/huu/vocab.txt b/models/huu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3c2ace5c06fa76c389591c6be0cef5f244c638b6 --- /dev/null +++ b/models/huu/vocab.txt @@ -0,0 +1,37 @@ +ñ +r +a +_ +z +g +u +n +q +c +ɨ +k +í +p +h +b +x + +j +— +é +i +f +t +d +- +ó +o +e +v +y +ú +' +s +l +m +á diff --git a/models/huv/G_100000.pth b/models/huv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd3495ca4e4c12d2a3a338b56fcaeffce2546496 --- /dev/null +++ b/models/huv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b1bca5821df437a0e67e0d204b146a50c96b463a703de1a59b35fcb26fb082a +size 145483771 diff --git a/models/huv/config.json b/models/huv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/huv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/huv/vocab.txt b/models/huv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d809cf1a486d4b8744aa5e5a6d07f1b4661411e --- /dev/null +++ b/models/huv/vocab.txt @@ -0,0 +1,37 @@ +| +a +n +i +e +t +ü +m +j +o +w +c +l +s +g +y +p +d +u +r +x +q +b +h +á +í +ú +ó +é +f +ǘ +v +z +ñ +k +1 + diff --git a/models/hvn/G_100000.pth b/models/hvn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..efb681f68d6963d9d958a579fea99e43bad939fd --- /dev/null +++ b/models/hvn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:202e9a47ff501499be33d4b52c51cf87dbe929b9a798c874ba8f9029ed7f710a +size 145476065 diff --git a/models/hvn/config.json b/models/hvn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hvn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hvn/vocab.txt b/models/hvn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..25ab9864721cf6c387b8056c5f7c9f770d4134b7 --- /dev/null +++ b/models/hvn/vocab.txt @@ -0,0 +1,27 @@ +e +a +s +' + +g +d +r +ó +o +j +f +w +p +_ +l +- +u +n +è +i +b +h +m +k +t +y diff --git a/models/hwc/G_100000.pth b/models/hwc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..340044f5be50ff422a6091aa94203d60c2af67dd --- /dev/null +++ b/models/hwc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e5d5b857008144555c8ffaaa7a908850063fc51a02f4ccf66ec65429ccd241a +size 145484623 diff --git a/models/hwc/config.json b/models/hwc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hwc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hwc/vocab.txt b/models/hwc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b07dba3f802b5d762098009c7b6c53bc3dc7994f --- /dev/null +++ b/models/hwc/vocab.txt @@ -0,0 +1,38 @@ +5 +j +r +z +w +s +4 +_ +h +n +t +i +o +b +m +c +x +k +q +e +- +l +f +d +y +a +— +' +0 +p +v +6 + +u +g +1 +7 +2 diff --git a/models/hyw/G_100000.pth b/models/hyw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..71d832f6efb3478dfcdf8afe9614153116bf4864 --- /dev/null +++ b/models/hyw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0cf03c1de2a10cb3cf47a63b7473278b514f9721b3b6d27521ed98d1cfd261f +size 145486855 diff --git a/models/hyw/config.json b/models/hyw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/hyw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/hyw/vocab.txt b/models/hyw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d77241923fcc8908fa005706f83600af1ab46842 --- /dev/null +++ b/models/hyw/vocab.txt @@ -0,0 +1,41 @@ +է +ր +ծ +հ +զ +ֆ +_ +ղ +կ +գ +դ +ւ +ի +պ +օ +ս +ք +չ +ն +ա +ճ +լ +ռ +վ +թ +ձ +տ +խ +բ +շ +ո +փ +' +մ +ց + +ը +ե +յ +ժ +ջ diff --git a/models/iba/G_100000.pth b/models/iba/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d7cd1ba481c2199e2d3007407bce97b56eb856c --- /dev/null +++ b/models/iba/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623132dc2ae45e82cea196de546821b2161f7752ebc7459f163da5ef917cde60 +size 145477629 diff --git a/models/iba/config.json b/models/iba/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/iba/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/iba/vocab.txt b/models/iba/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e376906bf3c1f6699e2a1fe81381461f5a0f7e91 --- /dev/null +++ b/models/iba/vocab.txt @@ -0,0 +1,29 @@ +a +| +i +n +u +e +k +g +t +l +d +s +m +r +b +h +y +p +j +o +w +c +- +v +0 +3 +5 +2 + diff --git a/models/icr/G_100000.pth b/models/icr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..41c9fee893acbc8f3b547c16e7d4bbba53558550 --- /dev/null +++ b/models/icr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4da79ce9c3956eb492e03175c333d173867b867930a0d7fb7f06650fa7e3cfb +size 145482241 diff --git a/models/icr/config.json b/models/icr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/icr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/icr/vocab.txt b/models/icr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c8df2f81bac20d51aa0917d75493b6852a4ef837 --- /dev/null +++ b/models/icr/vocab.txt @@ -0,0 +1,35 @@ +n +x +p +s +q +k +w +l +f +e +2 +0 +1 +m +z +c +- +o +u + +i +' +a +h +t +b +v +9 +r +j +— +_ +d +g +y diff --git a/models/idd/G_100000.pth b/models/idd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..997d80420aecafbd346e29aa7e1ad43f397a04fc --- /dev/null +++ b/models/idd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cc54e7ba0143f52f0af68beea237b73cd2463de71aa0f00837d3a72e5840245 +size 145486057 diff --git a/models/idd/config.json b/models/idd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/idd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/idd/vocab.txt b/models/idd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d22e0dbf95fae66c6f1bfd248e92a3f843de57ef --- /dev/null +++ b/models/idd/vocab.txt @@ -0,0 +1,40 @@ +p +ì +é +ḿ +d + +è +n +í +r +o +ɔ +ǹ +l +ò +ɛ +u +ú +ù +ó +́ +j +m +k +s +w +i +g +_ +a +h +' +y +à +f +b +e +t +̀ +á diff --git a/models/ifa/G_100000.pth b/models/ifa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..815babaf7a3c7d61762227ab98eb9dafadfb711a --- /dev/null +++ b/models/ifa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b2794c564d41bb60010dc7952aad259534850502bd3286984c2d50ff783a84 +size 145477601 diff --git a/models/ifa/config.json b/models/ifa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ifa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ifa/vocab.txt b/models/ifa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..53f481d284d5c1741fb82af0c1e90b0fa6dda8a5 --- /dev/null +++ b/models/ifa/vocab.txt @@ -0,0 +1,29 @@ +h +n +t +- +g +v +e +x +w +s +l +c +i +' +f +z +j +m +o +a +y + +p +b +k +u +d +r +_ diff --git a/models/ifb/G_100000.pth b/models/ifb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..35450cc706b7e569bc8cc62ab22f43c077f8a865 --- /dev/null +++ b/models/ifb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a3dded74c4053c426e5605025e2525bd484abdab3a9945b3939310f2ec4f24 +size 145478405 diff --git a/models/ifb/config.json b/models/ifb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ifb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ifb/vocab.txt b/models/ifb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..eb9ebc62095f361e4984f891a7c9c8f76a5abbf2 --- /dev/null +++ b/models/ifb/vocab.txt @@ -0,0 +1,30 @@ +| +a +n +i +u +d +y +h +t +m +o +g +l +p +' +b +e +s +w +k +r +j +c +v +z +f +x +- +q + diff --git a/models/ife/G_100000.pth b/models/ife/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..14dc6b7cc3ffc8378f08c21e10ea568e3e24fc93 --- /dev/null +++ b/models/ife/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcacbfce445cfea32badb0c868385eb05bb6a7f7eb125cde553997dd3ffa44d4 +size 145493077 diff --git a/models/ife/config.json b/models/ife/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ife/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ife/vocab.txt b/models/ife/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..84820c532e1bdc7f63fa3d645231bb66dd4a400c --- /dev/null +++ b/models/ife/vocab.txt @@ -0,0 +1,49 @@ +y +o +s +z +ǹ +d +t +e +̃ +ɖ +ĩ +ì +á +h +é +a +́ +m +w +ó +_ +ŋ +f +ń +à +ã +ɔ +g +l +ũ +̀ +b +ò +k +í +u +ú +r +ɛ +ḿ +ṹ + +è +p +‐ +n +- +ù +i diff --git a/models/ifk/G_100000.pth b/models/ifk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6373489895c6a542bbe8310bf3e9431d0ed70a0d --- /dev/null +++ b/models/ifk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dde2c8f363b3c1c007d3b7d64c7a0bf60d6a1a9e0857653ce5005743ee44428 +size 145481465 diff --git a/models/ifk/config.json b/models/ifk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ifk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ifk/vocab.txt b/models/ifk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9d1122f3f96ffaf07533df45d35a4ef746b6762e --- /dev/null +++ b/models/ifk/vocab.txt @@ -0,0 +1,34 @@ +| +a +n +i +u +d +t +y +h +o +m +k +g +l +e +p +b +s +- +w +r +j +c +` +' +f +v +z +x +q +1 +9 +7 + diff --git a/models/ifu/G_100000.pth b/models/ifu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9b33befbaabfa91410fd5943f29e14df9836b14 --- /dev/null +++ b/models/ifu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c436e1a42d0991f3db27c877e5e4a63ae8e6ce78baaf12d1c36927002bf5b648 +size 145478383 diff --git a/models/ifu/config.json b/models/ifu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ifu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ifu/vocab.txt b/models/ifu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0b8701c33ca0fb5e4bbc724ec2516507bd83a5b2 --- /dev/null +++ b/models/ifu/vocab.txt @@ -0,0 +1,30 @@ +| +a +n +h +i +u +o +t +m +p +c +j +g +y +l +k +e +- +s +d +r +f +' +w +b +v +z +x +á + diff --git a/models/ify/G_100000.pth b/models/ify/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a70c8fa60566a62c741acd4388951da73b1f5e5 --- /dev/null +++ b/models/ify/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:918e5bb7b28cd5b4cf839e03853f0aff77855a12efded0528e632af0c8792cbf +size 145478359 diff --git a/models/ify/config.json b/models/ify/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ify/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ify/vocab.txt b/models/ify/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..040678057ebe77ea271bfd180c34c6b009ede1f8 --- /dev/null +++ b/models/ify/vocab.txt @@ -0,0 +1,30 @@ +| +a +n +e +i +u +d +t +g +m +h +y +l +k +p +- +s +b +w +o +j +r +c +' +v +f +z +x +q + diff --git a/models/ign/G_100000.pth b/models/ign/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0fe7134126636f424ceec1f5641328a5c8ce9da3 --- /dev/null +++ b/models/ign/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8868ac5ef23002643ce91d66c849dbe4a4049965e54f55af2b8f0cdfd24ab372 +size 145488371 diff --git a/models/ign/config.json b/models/ign/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ign/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ign/vocab.txt b/models/ign/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cb1014c85910a108e6775378e9470cb4d8d2afba --- /dev/null +++ b/models/ign/vocab.txt @@ -0,0 +1,43 @@ +a +| +i +e +t +n +r +' +u +k +m +p +s +c +w +h +á +y +j +v +é +í +o +ú +l +ñ +d +b +– +g +f +ó +z +q +- +x +0 +1 +4 +9 +2 +3 + diff --git a/models/ikk/G_100000.pth b/models/ikk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..123a693127fd82fe60962e0e151394abfc20ee36 --- /dev/null +++ b/models/ikk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fefc190bcf98a8ee000d88f3a62df459fedd1effd99ab58386f7ec09c951510c +size 145483004 diff --git a/models/ikk/config.json b/models/ikk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ikk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ikk/vocab.txt b/models/ikk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4998fe9142e1483a74bcf67953a52480d69d3516 --- /dev/null +++ b/models/ikk/vocab.txt @@ -0,0 +1,36 @@ +r +t +' +a +h +k +n +ụ +- +b +ị + +i +w +ù +̀ +m +o +e +c +s +y +_ +z +l +ò +v +g +d +ẹ +u +f +p +ṅ +j +ọ diff --git a/models/ilb/G_100000.pth b/models/ilb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3bcd19cf472a28b0b16fa8581f89757325152e95 --- /dev/null +++ b/models/ilb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02108610a9cebff42574f68714967a4991f095b028cf205c02a02201510031aa +size 145476839 diff --git a/models/ilb/config.json b/models/ilb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ilb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ilb/vocab.txt b/models/ilb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8d212c3ed539cf3aa0ffbc4d11e8341c0c748f8e --- /dev/null +++ b/models/ilb/vocab.txt @@ -0,0 +1,28 @@ +m +ŋ +l +e +z + +n +c +u +' +w +s +d +f +i +y +p +h +- +j +a +t +b +_ +o +k +v +g diff --git a/models/ilo/G_100000.pth b/models/ilo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..683b7cda109a4f1cd9edaa6363d566a78621c4ab --- /dev/null +++ b/models/ilo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa35d3fe19f5a6962699cfc16bb9c32c68ea90f7026707b7604290ad01c78e11 +size 145486813 diff --git a/models/ilo/config.json b/models/ilo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ilo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ilo/vocab.txt b/models/ilo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0bad933fc219169e649ceb09c2be977209273b4e --- /dev/null +++ b/models/ilo/vocab.txt @@ -0,0 +1,41 @@ +a +| +i +n +t +g +k +d +o +s +m +e +u +p +y +l +r +b +w +j +- +c +h +f +— +v +z +0 +' +2 +1 +4 +x +3 +5 +6 +8 +7 +9 +q + diff --git a/models/imo/G_100000.pth b/models/imo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..81dc30d37c9cb6a335086aee6f2ad50eb6f08715 --- /dev/null +++ b/models/imo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9030f4770763229521b84611a8b82fb759e5cc2bf102d3848835533464871170 +size 145478387 diff --git a/models/imo/config.json b/models/imo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/imo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/imo/vocab.txt b/models/imo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fd75762861b2f580d7f528c4543aeae22ceef04b --- /dev/null +++ b/models/imo/vocab.txt @@ -0,0 +1,30 @@ +| +i +n +o +a +e +u +m +l +k +p +g +t +b +y +r +d +w +s +j +0 +f +v +h +1 +2 +5 +4 +' + diff --git a/models/inb/G_100000.pth b/models/inb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a8874d3640b639e2a999eaeda97b7800cdd4368 --- /dev/null +++ b/models/inb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec5bd7c56b2b3136e56f9132d397ce83874d190c12718e547b4d4da2b2664fb +size 145482982 diff --git a/models/inb/config.json b/models/inb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/inb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/inb/vocab.txt b/models/inb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..77d1e4d5b563e02a52f0d2e9f6cc222edf94c4bd --- /dev/null +++ b/models/inb/vocab.txt @@ -0,0 +1,36 @@ +a +| +i +­ +u +k +n +s +p +m +r +t +l +c +h +g +w +d +ñ +j +e +b +o +— +ú +á +í +é +f +z +v +ó +̈ +' +x + diff --git a/models/ind/G_100000.pth b/models/ind/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..db62a954f900be775d70adc46f434c48d9dcaa35 --- /dev/null +++ b/models/ind/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68055387728e98461cb6dcd83e7e07ea08ba293f81b37d616c3219058e62d53e +size 145482203 diff --git a/models/ind/config.json b/models/ind/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ind/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ind/vocab.txt b/models/ind/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..edf2720f81d6f6ca72ba49611f19a5e2f1cc34ee --- /dev/null +++ b/models/ind/vocab.txt @@ -0,0 +1,35 @@ +g +c +o + +— +' +t +n +i +_ +r +d +a +p +- +j +v +l +f +y +w +u +6 +s +h +k +z +0 +1 +5 +4 +e +b +2 +m diff --git a/models/iou/G_100000.pth b/models/iou/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..25513884ffd75eb42123b145586f9346014f930e --- /dev/null +++ b/models/iou/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb5c7616f2f54fcefd0fcb5e6ce61ce5719e1be20aab962d4418df7cd99456e +size 145484527 diff --git a/models/iou/config.json b/models/iou/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/iou/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/iou/vocab.txt b/models/iou/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3269414e0c3c3b9a95c149259bebd24bb2430a35 --- /dev/null +++ b/models/iou/vocab.txt @@ -0,0 +1,38 @@ +9 +1 +p +4 +e +i +n +5 +l +s +8 +f +ŋ +j +b +h +r + +a +g +3 +ä +0 +t +o +v +- +k +y +' +m +d +w +6 +u +2 +_ +7 diff --git a/models/ipi/G_100000.pth b/models/ipi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3cf907294e1734d879794c1d2a3575a391b39f20 --- /dev/null +++ b/models/ipi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee97f1c5c80db0bd0fdf9df0f809049ac45eff94a7eaf2e508d84fae9db8c7ba +size 145473799 diff --git a/models/ipi/config.json b/models/ipi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ipi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ipi/vocab.txt b/models/ipi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..49cef3ee6b9e6e86437fb13eb8825b50074c998e --- /dev/null +++ b/models/ipi/vocab.txt @@ -0,0 +1,24 @@ +a +| +e +n +o +i +t +l +p +k +m +u +y +d +w +b +- +s +g +j +ŋ +0 +2 + diff --git a/models/iqw/G_100000.pth b/models/iqw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d29bab6a5dfcb527fb828b453960e1cdb900e674 --- /dev/null +++ b/models/iqw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e5b3b062a465a9f0529c80d6e895a79eb9a43456ba4266dfc4610a66fed37b +size 145490677 diff --git a/models/iqw/config.json b/models/iqw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/iqw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/iqw/vocab.txt b/models/iqw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cdd2ee0d9924573a68ce22245c155f9805f28d84 --- /dev/null +++ b/models/iqw/vocab.txt @@ -0,0 +1,46 @@ +ù +ị +u +j +p +ó +t +' +ú +ǹ +c +g +d +h +ẹ +o +̀ +s +ọ +y +á +ì +b + +ụ +- +r +l +é +n +a +z +w +í +v +m +6 +è +ò +f +i +́ +e +k +à +_ diff --git a/models/iri/G_100000.pth b/models/iri/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4350a1a8e219f721ce58a549bac4810bbe0ab4c --- /dev/null +++ b/models/iri/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97155f1bfcabc3225d14841b8239f9ea2ba772555cf54c44a51800dfdecdccf8 +size 145488351 diff --git a/models/iri/config.json b/models/iri/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/iri/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/iri/vocab.txt b/models/iri/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..022c9a309fdd85ab7cf0d2ad7a577ba5731841a3 --- /dev/null +++ b/models/iri/vocab.txt @@ -0,0 +1,43 @@ +̱ +s +z +p +y +- +ē +l +u +í +ê +n +t +d +f +ú +ā +a +ī +w +o +m + +g +î +_ +v +ó +â +è +á +3 +i +b +h +j +6 +e +é +' +k +c +r diff --git a/models/irk/G_100000.pth b/models/irk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..eef4df1dde53b101d3d8351705fb12deddfd40ed --- /dev/null +++ b/models/irk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96367e31612a49fa7d592a01953f024267c130a0e3c08a62d9c1e8edde4b1aac +size 145480795 diff --git a/models/irk/config.json b/models/irk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/irk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/irk/vocab.txt b/models/irk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4e2674fd24bc2f95753a4e3df0e0ab761539842c --- /dev/null +++ b/models/irk/vocab.txt @@ -0,0 +1,33 @@ +| +a +i +e +r +n +u +o +s +á +m +h +l +t +g +k +w +' +d +í +y +ó +ú +b +é +q +x +f +p +c +j +z + diff --git a/models/isl/G_100000.pth b/models/isl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c165508c8a228f7e8ffc4028cb799d6faed6e57 --- /dev/null +++ b/models/isl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff4f6795b7c3897f6b9d7a77166697ea8ad00bb8bfb7ebb3c76864b1f53ee61 +size 145488499 diff --git a/models/isl/config.json b/models/isl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/isl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/isl/vocab.txt b/models/isl/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..310686c121c51f6bd2bce975d4f31e3b81f544cf --- /dev/null +++ b/models/isl/vocab.txt @@ -0,0 +1,43 @@ +5 +– + +f +ð +l +k +ó +o +a +_ +e +ú +á +3 +g +m +2 +u +p +é +æ +x +v +0 +- +7 +r +s +i +n +d +ý +1 +í +y +8 +b +h +t +þ +j +ö diff --git a/models/itl/G_100000.pth b/models/itl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3dc37b37da6375d73e6d77027107a693e2e6e50 --- /dev/null +++ b/models/itl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203d8fae6b5354972212292ac30b95a9fdfe02560c3819853324a10dc84ce621 +size 145492187 diff --git a/models/itl/config.json b/models/itl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/itl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/itl/vocab.txt b/models/itl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ed7ba7b53b05374dd4ea3db125665216d79798a6 --- /dev/null +++ b/models/itl/vocab.txt @@ -0,0 +1,48 @@ +э +ч +_ +ӑ +х +ы +и +ӈ +ӄ +ŏ +ɂ +– +р +м +љ +ў +с +ԓ +б +я +й +у + +ь +о +л +н +а +щ +ж +ӽ +2 +в +0 +к +ш +п +т +ʼ +ф +3 +г +з +1 +д +ә +њ +ц diff --git a/models/itv/G_100000.pth b/models/itv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5938ecbbd3467043d3b536adf3a6fceff8e985a1 --- /dev/null +++ b/models/itv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e4fde77b373934410e90f856a484635a843521a94eef0ada8bc9defc82c2af +size 145478391 diff --git a/models/itv/config.json b/models/itv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/itv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/itv/vocab.txt b/models/itv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ec3f9a8c646ca3abdacd7d3a60fcd1e2516a08a9 --- /dev/null +++ b/models/itv/vocab.txt @@ -0,0 +1,30 @@ +a +| +n +i +g +k +u +y +t +m +e +r +l +s +p +d +o +b +w +f +h +z +j +v +- +c +' +x +q + diff --git a/models/ixl-dialect_sangasparchajul/G_100000.pth b/models/ixl-dialect_sangasparchajul/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9820565783a00bf5be00cd4d625da53f31321759 --- /dev/null +++ b/models/ixl-dialect_sangasparchajul/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8524a1608b9048c3ab436aaadbfc71c006195b898f1d75e3a6e9fd49166868ff +size 145480675 diff --git a/models/ixl-dialect_sangasparchajul/config.json b/models/ixl-dialect_sangasparchajul/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ixl-dialect_sangasparchajul/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ixl-dialect_sangasparchajul/vocab.txt b/models/ixl-dialect_sangasparchajul/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3eb816d4d39f477be3bd39ff43501bfeae5fbb4e --- /dev/null +++ b/models/ixl-dialect_sangasparchajul/vocab.txt @@ -0,0 +1,33 @@ +v +u +d +x +s +b +i +r +6 +h +e +q +p +g +f +k +3 +4 +0 +l +- +a +z +c +ʼ +y +j + +m +t +_ +o +n diff --git a/models/ixl-dialect_sanjuancotzal/G_100000.pth b/models/ixl-dialect_sanjuancotzal/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..88625c1515ec5c62f03e7b4c132cdf8a906beef7 --- /dev/null +++ b/models/ixl-dialect_sanjuancotzal/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1458589bc65b2beca5d56c66eb20c044c74a098a22dc9e286b668241c94ccd85 +size 145487583 diff --git a/models/ixl-dialect_sanjuancotzal/config.json b/models/ixl-dialect_sanjuancotzal/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ixl-dialect_sanjuancotzal/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ixl-dialect_sanjuancotzal/vocab.txt b/models/ixl-dialect_sanjuancotzal/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cc15bdb20053d6fcc6cb54d8b68e6b521d21c544 --- /dev/null +++ b/models/ixl-dialect_sanjuancotzal/vocab.txt @@ -0,0 +1,42 @@ +| +a +' +t +i +u +e +l +n +o +h +k +s +x +z +c +j +m +q +b +v +y +p +r +— +d +g +0 +f +1 +4 +– +2 +3 +5 +6 +9 +7 +8 +ñ +- + diff --git a/models/ixl-dialect_santamarianebaj/G_100000.pth b/models/ixl-dialect_santamarianebaj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad749109f2abd0d09d812d001f16f1f9e7e51441 --- /dev/null +++ b/models/ixl-dialect_santamarianebaj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87c5b66d7213bbddd0c911ad721220b93b46d94a7ea7d7e261b996c413bcde8e +size 145487837 diff --git a/models/ixl-dialect_santamarianebaj/config.json b/models/ixl-dialect_santamarianebaj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ixl-dialect_santamarianebaj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ixl-dialect_santamarianebaj/vocab.txt b/models/ixl-dialect_santamarianebaj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2be6eb230d333077439a24ad6ff68df929b2af04 --- /dev/null +++ b/models/ixl-dialect_santamarianebaj/vocab.txt @@ -0,0 +1,42 @@ +| +a +' +t +e +u +i +l +n +k +s +v +h +o +x +j +b +c +q +m +z +y +p +r +— +ú +d +g +í +é +á +ó +f +– +0 +1 +- +4 +ñ +5 +3 + diff --git a/models/izr/G_100000.pth b/models/izr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0174bac434b99d3903cfa41d1c87203f88a438de --- /dev/null +++ b/models/izr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f4895468ec63e9f252074fc4f0786ced34f6a5a62e15af77ef8155f45c9d7b +size 145482989 diff --git a/models/izr/config.json b/models/izr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/izr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/izr/vocab.txt b/models/izr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b1c87aa4b7bc3f26750c8dd2b3e788727d929bc1 --- /dev/null +++ b/models/izr/vocab.txt @@ -0,0 +1,36 @@ +| +a +n +i +k +e +y +r +t +u +s +o +m +g +b +w +f +d +h +z +' +p +l +c +v +0 +j +1 +4 +ŕ +2 +— +ٔ +- +ł + diff --git a/models/izz/G_100000.pth b/models/izz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..490dc4305154c12feab563026abbf7ed3715fd8c --- /dev/null +++ b/models/izz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ecaa49c5b4f7ab0531eb632e60e81ca1b48293505aac508f062a4092f40f80 +size 145492221 diff --git a/models/izz/config.json b/models/izz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/izz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/izz/vocab.txt b/models/izz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a54f729db1b8d9204bc13a0fe7d8f4811daa01b6 --- /dev/null +++ b/models/izz/vocab.txt @@ -0,0 +1,48 @@ +ú +è +f +y +s +ẹ +k +ḿ +ị +d +e +́ +j +_ +é +v +ó +p +z + +ń +ụ +í +ọ +t +l +̀ +u +i +á +w +c +r +ò +h +ù +o +ǹ +à +ì +n +' +- +b +m +a +6 +g diff --git a/models/jac/G_100000.pth b/models/jac/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc99e940f1d78aa8c55fa21f24f3cefdcbef20f8 --- /dev/null +++ b/models/jac/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5223bdebd0d908d6dee57eec5f21aedab2bf6dcb71ed69f2cb98dc0e2bdb30b0 +size 145480681 diff --git a/models/jac/config.json b/models/jac/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jac/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jac/vocab.txt b/models/jac/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..568a97166602950b943e6179fc6140dfb83c5610 --- /dev/null +++ b/models/jac/vocab.txt @@ -0,0 +1,33 @@ +| +a +i +n +c +t +e +o +h +y +' +l +u +m +j +s +b +x +̈ +w +k +z +p +r +d +ẍ +- +q +g +f +v +` + diff --git a/models/jam/G_100000.pth b/models/jam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d58238e1fa2e88308891cf8ca0e062ff132a0c95 --- /dev/null +++ b/models/jam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df3c6249dac6f51f10d5a494d72bb25585cdcd83f09dcae6442093143520250c +size 145477717 diff --git a/models/jam/config.json b/models/jam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jam/vocab.txt b/models/jam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..85323e9eda8e22c7e6fda65d44a5771d06fda0d7 --- /dev/null +++ b/models/jam/vocab.txt @@ -0,0 +1,29 @@ +| +i +a +n +d +e +u +m +o +s +t +w +l +k +p +g +r +f +b +h +v +y +z +j +c +— +- +' + diff --git a/models/jav/G_100000.pth b/models/jav/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f631518ed50ce2fd5f72da2f8d3de5e3cf51d7a3 --- /dev/null +++ b/models/jav/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e34c7c52eefb3ad03aafebd43c794f3333fd39484aecf93f44326816da5d5794 +size 145475569 diff --git a/models/jav/config.json b/models/jav/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jav/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jav/vocab.txt b/models/jav/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..458b746b96e70268ade598d84c5668f74d318748 --- /dev/null +++ b/models/jav/vocab.txt @@ -0,0 +1,26 @@ +a +| +n +g +i +e +k +u +s +r +t +p +m +l +d +h +o +w +b +y +j +- +c +f +z + diff --git a/models/jbu/G_100000.pth b/models/jbu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e65653466e9b0eb5fc2757f4b4e67d39c9606fd6 --- /dev/null +++ b/models/jbu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695a6c91211970f5b8c51e181772922e72b2d8c43d9c3f734d038207a2299c09 +size 145492221 diff --git a/models/jbu/config.json b/models/jbu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/jbu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jbu/vocab.txt b/models/jbu/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..a4a268d8d3db7e01dee89575dd21ec2d99dd93fd --- /dev/null +++ b/models/jbu/vocab.txt @@ -0,0 +1,48 @@ +| +a +n +i +u +b +k +e +d +w +y +o +r +s +m +t +c +j +z +p +h +f +g +à +v +­ +' +í +l +ī +ò +á +ú +ā +ù +è +ì +é +ō +ē +- +̀ +ó +ḿ +ū +q +â + diff --git a/models/jen/G_100000.pth b/models/jen/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3fcf5f1ebbd0f9e0e8a725872f646ca9816d34c6 --- /dev/null +++ b/models/jen/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75348d2c8d0c405a3beebdca6ee1d7a74f256b82277a5490e74103f53a00e0ce +size 145483001 diff --git a/models/jen/config.json b/models/jen/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jen/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jen/vocab.txt b/models/jen/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c0afe138623030773f10c23b578c1caf29775677 --- /dev/null +++ b/models/jen/vocab.txt @@ -0,0 +1,36 @@ +8 +m +ɨ +s +y +t +i +c +r +k +f +_ +z +e +ɛ +̃ +p + +v +b +ã +j +h +g +u +d +l +w +ũ +ɔ +a +ĩ +' +n +o +ə diff --git a/models/jic/G_100000.pth b/models/jic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b774c0b49ccd8d77e5bb998bea8faef1fdd032c8 --- /dev/null +++ b/models/jic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de8c75006c21b4134060e2991eddfe5142c1dda6145cfa37087c382663c9ec6 +size 145488487 diff --git a/models/jic/config.json b/models/jic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jic/vocab.txt b/models/jic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..70d8cc0f1cf6dc6e772b6c3f5f2e2cd11f2c0fcc --- /dev/null +++ b/models/jic/vocab.txt @@ -0,0 +1,43 @@ +| +a +j +n +s +p +l +i +e +t +u +o +m +c +y +ü +ꞌ +w +á +' +d +v +q +é +r +í +ó +ú +̈ +g +b +— +ǘ +z +f +h +ñ +x +0 +1 +2 +k + diff --git a/models/jiv/G_100000.pth b/models/jiv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b758243a4b45db78f882e1af4e52f6f84da49e0b --- /dev/null +++ b/models/jiv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:655fa1748f79075059f1521af7ed7fba7820822680182d10405d6e6c1f6e3c60 +size 145475183 diff --git a/models/jiv/config.json b/models/jiv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jiv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jiv/vocab.txt b/models/jiv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..52ec18dc835f2cb66f111552d8f7a44e67d2de0c --- /dev/null +++ b/models/jiv/vocab.txt @@ -0,0 +1,26 @@ +a +| +i +u +n +t +r +s +m +k +h +e +̱ +j +y +á +c +w +p +í +ṉ +ú +é +ð +- + diff --git a/models/jmc/G_100000.pth b/models/jmc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ad1f40d5eaf268000430a8d907b93fa18eb8e67b --- /dev/null +++ b/models/jmc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153445b1f325883aae2e7e111f0b4cb8e4745ac68594542c66643e24047eea90 +size 145480665 diff --git a/models/jmc/config.json b/models/jmc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jmc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jmc/vocab.txt b/models/jmc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..650db881a13f93e5d9c408e2c4986fcb7fcc8a7b --- /dev/null +++ b/models/jmc/vocab.txt @@ -0,0 +1,33 @@ +a +| +i +n +e +y +k +u +o +m +s +l +w +v +r +d +h +f +t +b +g +- +p +' +z +0 +4 +j +1 +6 +3 +2 + diff --git a/models/jmd/G_100000.pth b/models/jmd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9982429d96fb90074951c91ee3b71c7f3869a652 --- /dev/null +++ b/models/jmd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa943b26a498c3cb5133e023e15849107318ebd17c173de3a9de05d1ce2ed8ac +size 145476101 diff --git a/models/jmd/config.json b/models/jmd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jmd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jmd/vocab.txt b/models/jmd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4778f923a690658ce61e24ff42f66273fbd4d351 --- /dev/null +++ b/models/jmd/vocab.txt @@ -0,0 +1,27 @@ +o +s +k +i +t +d +p +- +_ +c +w +a +' +u +b +h +g +r +e +j +l +n +z +y + +f +m diff --git a/models/jun/G_100000.pth b/models/jun/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..996342e32f9076c4ea6e33ffc9138ae44118353f --- /dev/null +++ b/models/jun/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83e3de50f8f61882bf8b735a40e2daa8a2ffcb5f585d5fe85d6f90e08139a564 +size 145500679 diff --git a/models/jun/config.json b/models/jun/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jun/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jun/vocab.txt b/models/jun/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5d4342b9a689954fe08c304897983d3a3dbbbc78 --- /dev/null +++ b/models/jun/vocab.txt @@ -0,0 +1,59 @@ +ଙ +ଖ +୯ +ୃ +ଁ +ଠ +୩ +ଧ +ଥ +ୟ +ନ +ଚ +ଫ +ଏ +ଂ +ପ +ଓ +ଲ +୫ +ଜ +ଉ +‍ +ଅ +୪ +ୈ +ବ +ଳ +ଟ +ଶ +ର +୬ +ତ +ଞ +ସ +ଷ +୨ +଼ +ି +ମ +' +ୁ + +୦ +- +୧ +୍ +େ +_ +ଗ +ଦ +ହ +ଯ +କ +ଣ +ଇ +ଡ +ା +ୋ +ଆ diff --git a/models/juy/G_100000.pth b/models/juy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ae316dad9a8807b1dea6a845f39e1f425cc89a0 --- /dev/null +++ b/models/juy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be57b32fbb2755b294752d24d93fb60cb3c74c06c464e0dd8bbe86e6cfbc1dbb +size 145485281 diff --git a/models/juy/config.json b/models/juy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/juy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/juy/vocab.txt b/models/juy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6960b9bec8705dc0f3e126d87d4e085539b4b680 --- /dev/null +++ b/models/juy/vocab.txt @@ -0,0 +1,39 @@ +୍ +| +ନ +ି +େ +ଆ +ଡ +ା +ର +ଜ +ଙ +ତ +ମ +ଲ +ବ +ସ +ଞ +ୟ +ୋ +ୁ +ଅ +କ +ଗ +ପ +ଏ +଼ +ଇ +ୱ +ଣ +ଃ +ଦ +ଓ +ଉ +ଁ +ଟ +ଂ +' +‍ + diff --git a/models/jvn/G_100000.pth b/models/jvn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..424244a1617a76df965474fb7e466bb588b7a520 --- /dev/null +++ b/models/jvn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93b34e9dc8e15201b2e71d27de9e8636f0fd064d15f754f50c4b8316e118eb72 +size 145482239 diff --git a/models/jvn/config.json b/models/jvn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/jvn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/jvn/vocab.txt b/models/jvn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..78d3485eb6181e6d22158d9cfa50ea4d7e72377b --- /dev/null +++ b/models/jvn/vocab.txt @@ -0,0 +1,35 @@ +| +a +n +g +k +i +u +s +é +r +t +o +l +e +w +d +m +p +b +è +y +h +j +- +f +v +0 +' +z +8 +1 +2 +5 +6 + diff --git a/models/kaa/G_100000.pth b/models/kaa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..65afcc210a750939816085fc2c0821fa5709e92b --- /dev/null +++ b/models/kaa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5050b1401c7d05007494987a07144b50006ce6b678f45dd6ae94658bf83c2acc +size 145491409 diff --git a/models/kaa/config.json b/models/kaa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kaa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kaa/vocab.txt b/models/kaa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..18384f1b36dbbee4adaade2379ee0fde1cb858f3 --- /dev/null +++ b/models/kaa/vocab.txt @@ -0,0 +1,47 @@ +| +а +е +и +ы +н +л +р +д +с +т +м +б +й +о +ќ +п +у +з +к +ш +њ +ў +ѓ +ж +є +г +ҳ +ү +қ +х +ң +μ +ғ +я +– +ѳ +ә +ө +в +- +ь +ф +ю +э +ц + diff --git a/models/kab/G_100000.pth b/models/kab/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3ff51c2f0ca97ff1c8b0526bbff7b4662525e113 --- /dev/null +++ b/models/kab/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec5e114e9d601f55a2c33623e36dbc9b6663963185e24f52adf47042d133ce43 +size 145484511 diff --git a/models/kab/config.json b/models/kab/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kab/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kab/vocab.txt b/models/kab/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5848416cec6922387a63adf3dc83a7f483f1c27a --- /dev/null +++ b/models/kab/vocab.txt @@ -0,0 +1,38 @@ +| +a +e +i +n +d +t +l +s +m +r +u +- +y +w +k +ɣ +b +g +ṛ +ɛ +f +q +ḥ +c +z +x +ṭ +ḍ +h +ṣ +ǧ +č +ẓ +j +' +‐ + diff --git a/models/kac/G_100000.pth b/models/kac/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b9f60e499a19a315842fda3e0c9eb9c726a8e2a --- /dev/null +++ b/models/kac/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b1cd97d3a5e17adf3f50ca9703faa0e79a593d545f8bf5fefcb8aa7a807400 +size 145475281 diff --git a/models/kac/config.json b/models/kac/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kac/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kac/vocab.txt b/models/kac/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0aca77876186ff4ce00e9b3f83d03537852cf09a --- /dev/null +++ b/models/kac/vocab.txt @@ -0,0 +1,26 @@ +| +a +n +i +h +g +m +u +t +e +s +w +k +r +d +y +l +p +j +b +o +c +z +- +' + diff --git a/models/kak/G_100000.pth b/models/kak/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..43580e15868f82331577f2d90133e6e3fa51b2e5 --- /dev/null +++ b/models/kak/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bd142cae471db035a171d104268dfb421fa71202481c486a5c44b33283eca0 +size 145481453 diff --git a/models/kak/config.json b/models/kak/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kak/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kak/vocab.txt b/models/kak/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..caf6b64c84c09df35de27f3a40f1c73cebd0b3a2 --- /dev/null +++ b/models/kak/vocab.txt @@ -0,0 +1,34 @@ +x +a +v +k +b +_ +c +o +y +r +q +' +p +i +e +d +l +t +ñ +â +w +6 +g + +j +m +h +z +1 +f +u +s +- +n diff --git a/models/kan/G_100000.pth b/models/kan/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..34dfc4f6734d1bc02dbf02f9d29f9297400f4e69 --- /dev/null +++ b/models/kan/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0734f8bdade8a250d608b0ce2abcd862a719971ce15eca9211b7dca73cc02125 +size 145513045 diff --git a/models/kan/config.json b/models/kan/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kan/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kan/vocab.txt b/models/kan/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..4cf3f10b9032d0b37c1d699646f5a88b81da7526 --- /dev/null +++ b/models/kan/vocab.txt @@ -0,0 +1,75 @@ +' +ಓ +ಮ +ಆ +ಭ +ಋ +ಚ +ಟ +ು +ೇ +ಜ +ಏ +ಾ +ಪ +ಶ +್ +ಈ +ಔ +ಞ +1 +ನ +ಎ +ಸ +ವ +ಇ +ಘ +5 +ಊ +ಳ +ಖ +ೃ +ೆ +- +ತ +ಝ +ಢ + +ಅ +ಧ +ೌ +ಹ +ಯ +9 +ಫ +ಛ +8 +6 +ಲ +ಣ +_ +ದ +ಬ +ಠ +ಡ +ಃ +3 +ೋ +ಂ +ಒ +ೀ +ಕ +ಿ +ಥ +ರ +ೂ +ಐ +ೈ +ಉ +2 +0 +ಗ +ೊ +ಷ +4 +7 diff --git a/models/kao/G_100000.pth b/models/kao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..abe7d9f4af7b9412c41155c6b79071e93c5aceac --- /dev/null +++ b/models/kao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8356d28fbd269a85984ff7bbdf6d479b7d5bf8c41f7c16530640116c05a5e2cf +size 145479895 diff --git a/models/kao/config.json b/models/kao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kao/vocab.txt b/models/kao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4515cd955f2f734e9ae951874c79c062dfa8f71a --- /dev/null +++ b/models/kao/vocab.txt @@ -0,0 +1,32 @@ +| +a +n +o +i +l +e +u +x +t +m +b +s +k +d +w +r +f +y +ɲ +' +g +ŋ +j +í +ń +h +p +é +c +­ + diff --git a/models/kaq/G_100000.pth b/models/kaq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b9ba1eb807976d7d0a034245db622276f91b33b --- /dev/null +++ b/models/kaq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1771ed0ff565199829b29a23d2d66a4f8985ee1af9984734b6aaa7e6964551 +size 145481441 diff --git a/models/kaq/config.json b/models/kaq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kaq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kaq/vocab.txt b/models/kaq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f4b9d0c0ed9404b25139c5f6506a7eb60fc99078 --- /dev/null +++ b/models/kaq/vocab.txt @@ -0,0 +1,34 @@ +x +ú +l +g +k +p +t +ó +f +_ +u +b +— +n +v +c +j +z +d +q + +o +í +h +y +a +ñ +s +i +m +á +é +r +e diff --git a/models/kay/G_100000.pth b/models/kay/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..992d376a6d59d25adf1c180d80c14354fe3ba19b --- /dev/null +++ b/models/kay/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ddf4b1a9a7dad395e90f2a7eb3cd28b6c3ed1ce2e8158543136a2dc4e61284 +size 145490005 diff --git a/models/kay/config.json b/models/kay/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kay/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kay/vocab.txt b/models/kay/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7d4f587d158786b15f440027df59e5fa67fd2da2 --- /dev/null +++ b/models/kay/vocab.txt @@ -0,0 +1,45 @@ +ã +m +ỹ +l +z +f +ẽ +u +n +w +x +ç +ĩ +' +j +t +5 +s +6 +a +c +1 +9 +— +õ +ũ +e +_ +k +d +v +y +2 +p +0 +h +i +b +g +7 +3 +o +r +8 + diff --git a/models/kaz/G_100000.pth b/models/kaz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..63d969c231f16052ffb21e1d8d4404ad53d63e27 --- /dev/null +++ b/models/kaz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f4b132f78d295417dfa62f7ea0873595904e07c79448b73c2bf34b547c4d9f3 +size 145488255 diff --git a/models/kaz/config.json b/models/kaz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kaz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kaz/vocab.txt b/models/kaz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6dddc92cc4905f7b3be45d33f1c727287282f562 --- /dev/null +++ b/models/kaz/vocab.txt @@ -0,0 +1,43 @@ +р +т +ұ +б +ү +у +ь +- +ю +ы +ш +й +в +_ +к +л +ң +ц +а +м +з +х +ә +һ +щ +е +ж +п +д +— +ф +э +г +қ +і +ғ +и +о + +н +с +я +ө diff --git a/models/kbo/G_100000.pth b/models/kbo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c82407e07691937fa544799f2a9594575528853b --- /dev/null +++ b/models/kbo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38a0eafe48a8813061f3c005012fdbc051b31500117ee09b3446c54eab73e788 +size 145493759 diff --git a/models/kbo/config.json b/models/kbo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kbo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kbo/vocab.txt b/models/kbo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c54deae389d009f6406c2ce28beba272efcd90b7 --- /dev/null +++ b/models/kbo/vocab.txt @@ -0,0 +1,50 @@ +s +t +i +ẽ +̃ +ị +ụ +o +g +u +m +l +ô +a +ọ +â + +́ +h +̂ +í +k +á +ꞌ +d +b +é +ũ +j +ộ +z +ĩ +p +ŋ +î +n +õ +_ +ã +e +f +v +y +' +c +ẹ +w +ú +r +ó diff --git a/models/kbp/G_100000.pth b/models/kbp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..09a386efa700ff86bbf493a4da1691eaf9c01c50 --- /dev/null +++ b/models/kbp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc035082d7a3d98b754e75e66b91b2f34e185b08456a9b5e1c6d62f649a4e35f +size 145488393 diff --git a/models/kbp/config.json b/models/kbp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kbp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kbp/vocab.txt b/models/kbp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9a3f6c515c050ecf2caab4acbe3630d960885b7d --- /dev/null +++ b/models/kbp/vocab.txt @@ -0,0 +1,43 @@ + +f +ɖ +é +z +t +- +i +ó +b +ɛ +n +e +k +m +ḿ +c +́ +g +ñ +đ +ú +ʋ +v +h +p +u +o +ń +j +y +l +a +ɣ +ɔ +r +á +ɩ +_ +w +ŋ +s +d diff --git a/models/kbq/G_100000.pth b/models/kbq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b7b9c98f1fe527febda592138d170b401786be3 --- /dev/null +++ b/models/kbq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3546bd50eec802ec17de58b07d8fdd4e314af2dcb659bbedfb8c64c441062e8 +size 145485263 diff --git a/models/kbq/config.json b/models/kbq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kbq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kbq/vocab.txt b/models/kbq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..87ae524b301917c1ee61be92a87202a2443c7dbe --- /dev/null +++ b/models/kbq/vocab.txt @@ -0,0 +1,39 @@ +u +8 +0 +v +m +f + +l +7 +1 +2 +y +_ +4 +h +g +p +- +d +e +a +3 +5 +q +i +b +6 +t +z +o +' +k +w +s +9 +x +r +n +j diff --git a/models/kbr/G_100000.pth b/models/kbr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d91f2450dcc255cc28c465b398933832454740a --- /dev/null +++ b/models/kbr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4b72dea56bf5507bed91228e7e3c57eaafb073f95fdc386697afc23b82eab30 +size 145479905 diff --git a/models/kbr/config.json b/models/kbr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kbr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kbr/vocab.txt b/models/kbr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5a530570ed9fca04e54418d3dbd2664217e42771 --- /dev/null +++ b/models/kbr/vocab.txt @@ -0,0 +1,32 @@ +| +o +a +i +e +n +h +t +b +c +y +s +m +u +l +g +k +r +q +d +' +w +x +f +j +p +- +z +6 +1 +4 + diff --git a/models/kby/G_100000.pth b/models/kby/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5194a824018e47a605bee4983d1c40ef5e442ac3 --- /dev/null +++ b/models/kby/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0f7c5789ee9fde7abd3e8a4b68d797711d7e36883bb9abfc92beb7b67b9bd99 +size 145478389 diff --git a/models/kby/config.json b/models/kby/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kby/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kby/vocab.txt b/models/kby/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..de3ff4e8603af8f37b640d17fe0e1f1aae009aac --- /dev/null +++ b/models/kby/vocab.txt @@ -0,0 +1,30 @@ +w +t +h +s +n +j +k +p +y +f +l +e +o +_ +m +a +g +- +b +' +3 +u +4 +i +r +ǝ +ɍ +c +d + diff --git a/models/kca/G_100000.pth b/models/kca/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f5403bdb47ec0ea14f72d27d2895a40dce384bc --- /dev/null +++ b/models/kca/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bf57c099809f4c7991e9c113dfe4eac9d8f1bba4ca9e12b6982395e96cc08fc +size 145490653 diff --git a/models/kca/config.json b/models/kca/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kca/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kca/vocab.txt b/models/kca/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2b6be3f22fb80dfef2a5077a5d98e67616fae3d8 --- /dev/null +++ b/models/kca/vocab.txt @@ -0,0 +1,46 @@ +ц +– +' +ч +к +м +з +ӑ +ә +ԓ +с +п +ш +в +е +ă +ў +ԋ +т +ы +х +ŏ +р +ӈ + +- +г +_ +у +ԉ +ԏ +о +и +й +д +н +э +ф +ь +ё +ю +л +я +а +щ +б diff --git a/models/kcg/G_100000.pth b/models/kcg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8803e7945a0d6247cf7386dc57fbf46979947234 --- /dev/null +++ b/models/kcg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e375c6d64032c26990ba17cccfb9ef800bb3d6af2a5813d8b2d50018c555c17 +size 145481445 diff --git a/models/kcg/config.json b/models/kcg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kcg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kcg/vocab.txt b/models/kcg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..09936a28e32e8bf8bb1b54abad233a270acb4ad7 --- /dev/null +++ b/models/kcg/vocab.txt @@ -0,0 +1,34 @@ +á +_ +í +y +ó +a +v +̱ +o +w +z +n +g +k +d +j +i +s +t +ú +r +' +p +l +é +m +f + +h +b +u +e +- +c diff --git a/models/kdc/G_100000.pth b/models/kdc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0e7e2268197f7083182cd0d1373afb84a6f6a9ae --- /dev/null +++ b/models/kdc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6598d83d087495f3105347979c12741a266b17894268b8f263e983ee4054650d +size 145475291 diff --git a/models/kdc/config.json b/models/kdc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kdc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdc/vocab.txt b/models/kdc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..86a8d34e0dcf24a9cc5d1b72ca491cd66cff2193 --- /dev/null +++ b/models/kdc/vocab.txt @@ -0,0 +1,26 @@ +l +f +c +m +s +p +e +o +i +u +v +h +a +' +z +d +g +w + +k +b +j +t +n +y +_ diff --git a/models/kde/G_100000.pth b/models/kde/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c65d3a2cbe2dab09b634c4ac32c6080aa139247 --- /dev/null +++ b/models/kde/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee98541e54462be3d9f01d9fbd5b6995773d461f29cc8afea093f0f361e0e077 +size 145480665 diff --git a/models/kde/config.json b/models/kde/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kde/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kde/vocab.txt b/models/kde/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..498a19a2b01b566d3e37f39d0abdd9b4b7414959 --- /dev/null +++ b/models/kde/vocab.txt @@ -0,0 +1,33 @@ +í +w +g +è +e +y +d +a +l + +u +_ +o +h +ó +k +j +á +s +i +ù +' +ú +b +p +- +t +m +ì +à +é +v +n diff --git a/models/kdh/G_100000.pth b/models/kdh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..aaacbd6e2dd85009910213cdadc41cdae22e31a9 --- /dev/null +++ b/models/kdh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651000cf32eb73718a34aa75615888a1fb31b8407e0250f832f5faaaa80ccd76 +size 145488363 diff --git a/models/kdh/config.json b/models/kdh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/kdh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdh/vocab.txt b/models/kdh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..87c37d9e7b6fdd8eb1669b18779ad75bcf5cb247 --- /dev/null +++ b/models/kdh/vocab.txt @@ -0,0 +1,43 @@ +| +ɩ +́ +a +ɛ +n +á +ɔ +b +s +m +w +d +l +ʊ +k +r +e +ɖ +t +y +g +í +i +z +o +- +é +ú +u +ŋ +v +ń +f +j +ó +c +ḿ +p +h +‐ +ÿ + diff --git a/models/kdi/G_100000.pth b/models/kdi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2cd103406e92b84311b827a1d14c73ee8c01c9bd --- /dev/null +++ b/models/kdi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49aaf496e3ee21a9942ddded90a96120e4249d2255d083090227f3d5a48e6b05 +size 145476089 diff --git a/models/kdi/config.json b/models/kdi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kdi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdi/vocab.txt b/models/kdi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..95748e8fab4c0d7d3aa2230a2ecdef99c5e054f9 --- /dev/null +++ b/models/kdi/vocab.txt @@ -0,0 +1,27 @@ +| +o +e +a +i +k +n +m +d +u +t +g +r +b +w +l +y +p +c +s +j +' +4 +3 +0 +- + diff --git a/models/kdj/G_100000.pth b/models/kdj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..312422092132a5ed14a7b99515ded2ce8128b365 --- /dev/null +++ b/models/kdj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd914aab2333cd0e067cbc0669e6911e5065040a3ec16973949cf38dc4d9f6b +size 145475311 diff --git a/models/kdj/config.json b/models/kdj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kdj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdj/vocab.txt b/models/kdj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3d0d77612e92df5fa86f4e214869d67cb0cf8949 --- /dev/null +++ b/models/kdj/vocab.txt @@ -0,0 +1,26 @@ +| +a +i +e +o +k +n +ŋ +u +t +r +l +y +s +m +p +d +b +w +c +j +g +– +- +' + diff --git a/models/kdl/G_100000.pth b/models/kdl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..aced4a84e0995f83cf3a60ecfaca6b8ae2141911 --- /dev/null +++ b/models/kdl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14a457d595c08bd44d55f6811a4d4b37a26fba9e558bd1be69425d272d95c34b +size 145480671 diff --git a/models/kdl/config.json b/models/kdl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kdl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdl/vocab.txt b/models/kdl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5b7193a1b8679fc91319887ba563471efde926ee --- /dev/null +++ b/models/kdl/vocab.txt @@ -0,0 +1,33 @@ +| +a +u +i +n +k +̱ +s +t +e +m +y +o +l +w +g +r +h +ɗ +b +p +v +c +d +z +ꞌ +ɓ +f +j +‐ +- +' + diff --git a/models/kdn/G_100000.pth b/models/kdn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a6880127c1017bd9fca588a97ddc8065af843213 --- /dev/null +++ b/models/kdn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eae59094bbede49f4b23985cb2eef8666a809e96f4509e26ba855e025755e14 +size 145477725 diff --git a/models/kdn/config.json b/models/kdn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kdn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdn/vocab.txt b/models/kdn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d022b7723190e7d5d304d5490ea1e137bb7c6193 --- /dev/null +++ b/models/kdn/vocab.txt @@ -0,0 +1,29 @@ +a +| +i +u +n +k +e +m +l +d +o +w +t +h +z +y +p +s +b +g +v +c +f +j +r +- +' +– + diff --git a/models/kdt/G_100000.pth b/models/kdt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..974e17742c15431f6fb4e8b0dc8fa969083f11f2 --- /dev/null +++ b/models/kdt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ad7349971eef640a5bfa3c0298456d9e795c3037851fe2a1cacc245c2dc4d3 +size 145502221 diff --git a/models/kdt/config.json b/models/kdt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kdt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kdt/vocab.txt b/models/kdt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fba93360a3aee98379d68172be09c88e369dcdbf --- /dev/null +++ b/models/kdt/vocab.txt @@ -0,0 +1,61 @@ +៉ +ី +ះ +ង +ថ +ភ +ដ +ផ +ឹ +ខ +ូ +- +ញ +យ +ិ +ណ +រ +អ +ឱ +ៅ +ឋ +ែ +ា +ឥ +ល +ម +ឌ + +ឡ +េ +ស +ំ +្ +ព +ហ +គ +ួ +័ +ទ +ៃ +ឃ +ឿ +ុ +ន +៊ +៍ +ក +វ +ឆ +ោ +ើ +ជ +ឈ +់ +_ +ច +ៀ +ធ +ឺ +ត +ប diff --git a/models/kek/G_100000.pth b/models/kek/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d9a2d7eb1d3b1be3e63102764a98e2844ca3756 --- /dev/null +++ b/models/kek/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8e5a1d3b167682de15892a9e02a39e5140224ed8ae33ac3c38e46eb6ad3826 +size 145483771 diff --git a/models/kek/config.json b/models/kek/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kek/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kek/vocab.txt b/models/kek/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ec1053e48d0ec10c33e29d013897fdc1a3a4f4f7 --- /dev/null +++ b/models/kek/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +u +c +l +n +' +e +t +̱ +b +x +o +h +k +r +j +s +q +m +y +d +p +z +— +- +ú +í +g +ó +é +f +á +v +ñ + diff --git a/models/ken/G_100000.pth b/models/ken/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ceab8624c5521908d3b93e51f9d4c949ae87691 --- /dev/null +++ b/models/ken/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1527830c4eb6825b738d5150fc64202d206b1dc42a7b2bbeda215948c0268e05 +size 145494493 diff --git a/models/ken/config.json b/models/ken/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ken/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ken/vocab.txt b/models/ken/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ed645458219c8cb20f8600307fedaf28f5133d16 --- /dev/null +++ b/models/ken/vocab.txt @@ -0,0 +1,51 @@ +| +ɛ +n +b +a +m +k +́ +ɔ +h +y +t +á +ɨ +i +ŋ +s +r +p +c +d +o +u +w +í +ʉ +̌ +g +e +f +ǎ +ǔ +ó +ú +̀ +l +ǒ +é +j +à +ě +ò +ǐ +- +ù +ì +' +è +v +ń + diff --git a/models/keo/G_100000.pth b/models/keo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fcddfab28fc0abbc48157ad744ab3815f6c3e388 --- /dev/null +++ b/models/keo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2e8b4b3a5334622188671fe9f27e5a62444177d195283124a33706e0189171 +size 145476842 diff --git a/models/keo/config.json b/models/keo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/keo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/keo/vocab.txt b/models/keo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8a2f396e0be32d82ee38af36c3dd37002ac19b95 --- /dev/null +++ b/models/keo/vocab.txt @@ -0,0 +1,28 @@ +| +a +i +u +o +k +n +e +l +r +d +t +g +y +b +ŋ +m +p +s +' +z +w +h +f +v +­ +— + diff --git a/models/ker/G_100000.pth b/models/ker/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..447f61e365d4b1347b97d7971a8843158b336fff --- /dev/null +++ b/models/ker/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82db2a236f3c85be86ec78295a98af38aff910d283fc36e3f596a1dc737f045 +size 145495261 diff --git a/models/ker/config.json b/models/ker/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ker/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ker/vocab.txt b/models/ker/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e800de3a36ce7f8993ce384fae4aa77bc8712a73 --- /dev/null +++ b/models/ker/vocab.txt @@ -0,0 +1,52 @@ +| +a +ə +i +e +ŋ +k +n +m +t +u +r +d +b +l +o +w +s +g +y +p +h +ɗ +j +c +f +- +z +á +ɓ +' +v +ã +ó +é +õ +ẽ +ú +̃ +– +1 +0 +2 +3 +4 +5 +6 +7 +í +8 +9 + diff --git a/models/key/G_100000.pth b/models/key/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fee1c7ef9fd02cffdeb1764fca1e89ac5371a456 --- /dev/null +++ b/models/key/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f14ec2386b6240d601d5bef2151a7e95089a51edb51db64edc127da9e23587 +size 145496073 diff --git a/models/key/config.json b/models/key/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/key/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/key/vocab.txt b/models/key/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..52304e5c1db7ff627716dcfb7a0105f0d75aa2ee --- /dev/null +++ b/models/key/vocab.txt @@ -0,0 +1,53 @@ +ఐ +ఙ +ీ +జ +శ +_ +త +క +చ +ె +వ +ఊ +స +‍ +ఁ +- +ష +ద +ప +గ +ఓ +ఈ +న +ఔ +ే +' +ల +ో +ం +ర +ఎ +బ +ఏ +ఆ +ు +ూ +ఫ +ౌ +ై +ఉ +ొ +ా +అ +య +హ +డ +ఇ +ఒ +మ +్ +ట + +ి diff --git a/models/kez/G_100000.pth b/models/kez/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..779358319724be6e4b03935bf5ed56b1afb92dee --- /dev/null +++ b/models/kez/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca3a4d080aa2dd0580cef89002ea9dc05b1406a760898258acf2fbde380c01c1 +size 145493852 diff --git a/models/kez/config.json b/models/kez/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kez/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kez/vocab.txt b/models/kez/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7dcaeb6e34d74878bbf68c4b1c351899faceb4c6 --- /dev/null +++ b/models/kez/vocab.txt @@ -0,0 +1,50 @@ +| +a +ɔ +n +ɛ +k +l +b +t +g +i +m +u +e +y +s +w +p +o +z +f +d +v +- +̀ +j +r +h +c +́ +à +á +' +x +é +ú +ù +ì +è +q +– +0 +6 +— +í +ǹ +5 +ɜ +ń + diff --git a/models/kfb/G_100000.pth b/models/kfb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..49bc764fd399fd31358135d94890c644288c739a --- /dev/null +++ b/models/kfb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51a8ac0a50d5eb4f1de6c53a5d38c77231f524e496fd70772da85a997cae320 +size 145493763 diff --git a/models/kfb/config.json b/models/kfb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kfb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kfb/vocab.txt b/models/kfb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3bc57f979803102a70cf5b63e47674f25b89b460 --- /dev/null +++ b/models/kfb/vocab.txt @@ -0,0 +1,50 @@ +| +ा +न +् +े +त +क +ी +र +द +ु +‍ +म +आ +ग +ं +ल +स +व +प +ो +ट +य +ि +इ +ड +ळ +ए +ब +ज +ओ +श +ू +ई +उ +च +ै +अ +ौ +ऊ +' +ृ +- +० +४ +१ +६ +२ +७ + diff --git a/models/kff-script_telugu/G_100000.pth b/models/kff-script_telugu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4224dab2ec9ece78cdba8fee411c87f79b4e8a32 --- /dev/null +++ b/models/kff-script_telugu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc450cb1793eca1d1aea38c64d5bb12b87dd36a3a8b8a2465afcc3fd237fb6c +size 145486847 diff --git a/models/kff-script_telugu/config.json b/models/kff-script_telugu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kff-script_telugu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kff-script_telugu/vocab.txt b/models/kff-script_telugu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc9214d7b43ca64c99bba4ba7c963d80c40031bd --- /dev/null +++ b/models/kff-script_telugu/vocab.txt @@ -0,0 +1,41 @@ +| +ి +న +్ +ు +త +ా +క +ర +ం +ె +మ +ద +స +ో +గ +ప +ల +ట +వ +డ +య +ే +బ +ొ +జ +ఓ +అ +ీ +ఇ +చ +ఆ +ూ +ఈ +ఒ +ఊ +ఏ +ఎ +ఉ +- + diff --git a/models/kfw/G_100000.pth b/models/kfw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2e9d8590e868d496eca00e5792b2fb70c4d61e5 --- /dev/null +++ b/models/kfw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bb735301b0d13697881ed61de8ae7607454d1aad1561a37860a4e3a9d2553e +size 145488375 diff --git a/models/kfw/config.json b/models/kfw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kfw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kfw/vocab.txt b/models/kfw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..15b0dd54bc3f8d1d97dadffee56811c5c3f31580 --- /dev/null +++ b/models/kfw/vocab.txt @@ -0,0 +1,43 @@ +t +_ +l +z +– +2 +x +­ +p +— +7 +i +6 +k +d +a +y +o +' +w +c +r +v +n +0 +9 +b +u +q +f +j +m +- +8 +5 +e +4 + +g +1 +h +3 +s diff --git a/models/kfx/G_100000.pth b/models/kfx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0eee016f5ad3ec5f3f0a9d3824bc0f9ba073cb8 --- /dev/null +++ b/models/kfx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec6950a0699a1edc737a3262f18793d070581487b0daab3f28b55fa8934ed23 +size 145505289 diff --git a/models/kfx/config.json b/models/kfx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kfx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kfx/vocab.txt b/models/kfx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..33edd777698e4539007be21ef1273ba377ce365d --- /dev/null +++ b/models/kfx/vocab.txt @@ -0,0 +1,65 @@ +| +ा +ै +र +ी +स +त +ह +क +न +ि +ब +् +े +ो +़ +ु +ं +म +ल +प +ज +ण +द +य +श +आ +ू +ौ +ग +ई +व +भ +ऐ +ँ +ध +च +ख +ड +इ +ऊ +घ +ए +ढ +ट +छ +थ +अ +फ +ठ +झ +ष +- +औ +उ +ऑ +ओ +ः +ञ +ृ +ऋ +' +– +‍ + diff --git a/models/khg/G_100000.pth b/models/khg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b0ffa2e31f9bff88792bf7cfc71d661e95f9639 --- /dev/null +++ b/models/khg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81f72182fccd67433f112a793bed142e08cf810c9bf2695e094b855fe4a6dd92 +size 145500751 diff --git a/models/khg/config.json b/models/khg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/khg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/khg/vocab.txt b/models/khg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ee7084a57a2474c49081ae9886549c6627506934 --- /dev/null +++ b/models/khg/vocab.txt @@ -0,0 +1,59 @@ +མ +_ +ྱ +ཀ +ཆ +ག +ད +ཨ +ཐ +ྟ +ཤ +ཙ +ྕ +ོ +ྫ +ི +ལ + +ཟ +ྙ +ཡ +ཏ +ཁ +ྤ +ྦ +ང +ྐ +ན +ྭ +ཪ +ུ +ྲ +ྷ +ྒ +ྗ +ཞ +ར +ཅ +ྣ +ླ +པ +ྨ +འ +ྡ +ཱ +ཕ +ཊ +བ +ཉ +ྩ +་ +ཚ +ེ +ས +ཇ +ྔ +ཛ +ཝ +ཧ diff --git a/models/khm/G_100000.pth b/models/khm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..758a81df35191a327c4f49dceb4f42302164d3e5 --- /dev/null +++ b/models/khm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3fdc19424c9b71f3412b1027c57e7fab53a0f739b85d2598225702453336525 +size 145512065 diff --git a/models/khm/config.json b/models/khm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/khm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/khm/vocab.txt b/models/khm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1dee5053bc85d9f1ea727781660472da1a42370e --- /dev/null +++ b/models/khm/vocab.txt @@ -0,0 +1,74 @@ +្ +យ +អ +ឆ +គ +ង +ើ +ធ +់ +ឃ +ឌ +ឥ +ប +ួ +ឈ +q +៏ +ផ +ិ + +ជ +ឧ +ល +ឱ +ទ +ូ +រ +៎ +ឭ +ា +ៀ +ឿ +េ +ំ +៍ +ត +_ +៉ +ក +ែ +ៅ +ៃ +ដ +វ +ៈ +ឬ +ឫ +ះ +ោ +ី +ុ +ឯ +ព +ឡ +ច +ឋ +ណ +1 +ហ +ន +ឹ +ស +ខ +័ +ញ +៊ +ឮ +៌ +ថ +ឺ +ឪ +- +ម +ភ diff --git a/models/khq/G_100000.pth b/models/khq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4dadac07bb60e551abdbc52d9fecef9eeed3068 --- /dev/null +++ b/models/khq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cc3e33514d66970116687c3254a3cca00d91a2e16bf7b81214029efd252c7f3 +size 145482991 diff --git a/models/khq/config.json b/models/khq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/khq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/khq/vocab.txt b/models/khq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d1eb387fa7fcd1cc15a594ab0eb6e4cd5dca639b --- /dev/null +++ b/models/khq/vocab.txt @@ -0,0 +1,36 @@ +n +k +– +i +d +' +g +b +z +c +ɲ +j +o +f +r +a +š +ã + +s +t +h +ẽ +l +ŋ +p +ĩ +e +ž +y +_ +- +õ +m +u +w diff --git a/models/kia/G_100000.pth b/models/kia/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8fde4eb7539f096509a0e3bb396b650d601e06cf --- /dev/null +++ b/models/kia/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e4489879925167e55b71cbbc37a283ee77a34a1e16d18af053dd2d9b97fc4f +size 145490671 diff --git a/models/kia/config.json b/models/kia/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kia/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kia/vocab.txt b/models/kia/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f07a6a84df226e5c7f63846fac83db7082dcbed4 --- /dev/null +++ b/models/kia/vocab.txt @@ -0,0 +1,46 @@ +à +ɓ +d +f +m +b +z +c +ù +h +n +ɗ +p +á +ó +g +ŋ +ú +k +l +s +v +ò +ḛ +' +y +é +̰ + +u +ṵ +_ +r +ū +́ +ḭ +- +a +w +e +j +í +o +è +i +t diff --git a/models/kij/G_100000.pth b/models/kij/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8b824c268233307740162ca6aefdf4d314033661 --- /dev/null +++ b/models/kij/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25a9d0351c4cbd6b90479052044b4c67ae55c34e61b0a4bc032502b34df32ab +size 145482247 diff --git a/models/kij/config.json b/models/kij/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kij/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kij/vocab.txt b/models/kij/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..608f7650bd3d46f3ee7505d722851db8bb5fc756 --- /dev/null +++ b/models/kij/vocab.txt @@ -0,0 +1,35 @@ +i +0 +á +6 +w +y +d +t +é +k +g +s +e +- +_ +1 +p +n +8 +b +r +5 +2 +f + +l +v +o +' +a +` +m +ó +u +4 diff --git a/models/kik/G_100000.pth b/models/kik/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6fa40516b7ee04079baf19bb33930809cf3f585 --- /dev/null +++ b/models/kik/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c63cd08e68b36a4a85313de4d98062bf6dcf03a792526d09668280e261280ea +size 145483867 diff --git a/models/kik/config.json b/models/kik/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kik/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kik/vocab.txt b/models/kik/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6b3cc20445e5e9c6c89fa68c750ddb13b484cb3e --- /dev/null +++ b/models/kik/vocab.txt @@ -0,0 +1,37 @@ +ũ +t +5 +h +1 +w +a +4 +n +j +ʼ +o +v +i +ĩ +_ +2 +m +k +0 +c +s +e +q +- +p +y +u +g +' + +d +b +r +z +f +l diff --git a/models/kin/G_100000.pth b/models/kin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b1322276db5c8462512d59f302917d1a9dde8476 --- /dev/null +++ b/models/kin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1942ff6bb02f250d0db09a43a48ac159531ad3521e4ba8aada741e8ab4fb1ac +size 145489143 diff --git a/models/kin/config.json b/models/kin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kin/vocab.txt b/models/kin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d7e228036eeccf0f1db297e6dad2f19f1cf8ee18 --- /dev/null +++ b/models/kin/vocab.txt @@ -0,0 +1,44 @@ +| +a +i +u +e +n +b +r +m +o +y +k +w +g +t +s +h +z +d +' +f +c +j +v +p +l +7 +1 +- +2 +3 +4 +6 +9 +8 +5 +0 +ē +ō +ā +ī +` +ū + diff --git a/models/kir/G_100000.pth b/models/kir/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..56ee6d4665b168d91808eaed9fb14e8e1804e58b --- /dev/null +++ b/models/kir/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f09a01c9c449204e44ed69c27d46d3783343a1eae9dabd9e6130e31b744380 +size 145484400 diff --git a/models/kir/config.json b/models/kir/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kir/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kir/vocab.txt b/models/kir/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..56983f3df53a4b2f48a674c34069b890a41e5c44 --- /dev/null +++ b/models/kir/vocab.txt @@ -0,0 +1,38 @@ +ж +ц +ө +– +и +ы +ю +я +е +о +й +л +у +- + +ү +_ +ң +э +а +д +т +с +н +р +ш +к +з +х +ё +б +в +п +ч +ь +г +м +ф diff --git a/models/kjb/G_100000.pth b/models/kjb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..abe049842d7a02e57017944ebf8ece71434a16d3 --- /dev/null +++ b/models/kjb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5c93435d4e38a95b81d953bb51b2335219e5bdb25d26787ddba738ed7d8b6a +size 145492199 diff --git a/models/kjb/config.json b/models/kjb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kjb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kjb/vocab.txt b/models/kjb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fa4fe11c22c718a6147b43af31209073defba863 --- /dev/null +++ b/models/kjb/vocab.txt @@ -0,0 +1,48 @@ +| +a +i +n +t +c +l +e +y +o +' +j +u +b +x +k +m +h +s +w +p +z +r +d +ẍ +q +ú +g +— +í +é +á +f +- +ó +v +0 +1 +2 +4 +5 +3 +9 +6 +7 +8 +ñ + diff --git a/models/kje/G_100000.pth b/models/kje/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8a428a1dae8fd9f0fbc40d0018fb8f0ca718da93 --- /dev/null +++ b/models/kje/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c431d4b7dc9dd6cd780353be005dca034484248cdd678357c75b9cd328e8475 +size 145476059 diff --git a/models/kje/config.json b/models/kje/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kje/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kje/vocab.txt b/models/kje/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1caa4fab92b1ea4917d94c5a14ba8056c5ca75e9 --- /dev/null +++ b/models/kje/vocab.txt @@ -0,0 +1,27 @@ + +f +k +h +s +z +o +_ +r +i +a +g +c +b +e +j +- +w +d +u +n +m +y +t +l +' +p diff --git a/models/kjg/G_100000.pth b/models/kjg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b99cf029d45e4740a30b127322e5ebc2a50d1e97 --- /dev/null +++ b/models/kjg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb6d44ed079753a0b7affe5f1385a9c405de3953b86d9bec57f1911a82167dab +size 145481455 diff --git a/models/kjg/config.json b/models/kjg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kjg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kjg/vocab.txt b/models/kjg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..50dfccc2c1880af0b647de64be7ffbfe5bd31a31 --- /dev/null +++ b/models/kjg/vocab.txt @@ -0,0 +1,34 @@ +| +a +n +' +h +g +o +m +y +r +ô +t +i +l +k +w +u +s +e +é +p +c +è +d +b +v +ñ +j +- +ê +f +2 +0 + diff --git a/models/kjh/G_100000.pth b/models/kjh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..96bfed7279d880c2a71d0c714987a10140db4099 --- /dev/null +++ b/models/kjh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:594bb447b275c39c6fb4ccc5f4f4adcfd2d790c4eb6b6f25aa7e4e5cf77a344a +size 145490767 diff --git a/models/kjh/config.json b/models/kjh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kjh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kjh/vocab.txt b/models/kjh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..02a9059cb1f0d524f3ebda0d3c79017a755b9018 --- /dev/null +++ b/models/kjh/vocab.txt @@ -0,0 +1,46 @@ +ч +0 +р +і +ь +ы +ц +з +а +_ +х +и +с +м +е +л +ф +к +ң +4 + +́ +т +э +б +ӧ +г +н +1 +о +ш +– +ҷ +й +я +у +ю +ё +д +в +- +п +ӱ +2 +6 +ғ diff --git a/models/kki/G_100000.pth b/models/kki/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fd2213b0fc87aa967292e524f05cccf1da52b4e --- /dev/null +++ b/models/kki/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c8f475ac152d03bb6de640bef6157b73dbe13635d2db606ea01a51d6e0272fe +size 145477587 diff --git a/models/kki/config.json b/models/kki/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kki/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kki/vocab.txt b/models/kki/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..296295e817e4fbf720aecc7c30863386ed340226 --- /dev/null +++ b/models/kki/vocab.txt @@ -0,0 +1,29 @@ +| +a +i +u +n +e +o +w +k +m +l +h +g +y +s +d +f +t +c +b +j +p +' +v +2 +q +- +1 + diff --git a/models/kkj/G_100000.pth b/models/kkj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d88bee73351ab4145fa8f3bea95ad7ebada63d9 --- /dev/null +++ b/models/kkj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8e3bf137b28a02f0aa09e1f9221f83a99eae268ea3d10954cabaf864fca1de +size 145486803 diff --git a/models/kkj/config.json b/models/kkj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kkj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kkj/vocab.txt b/models/kkj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2a758f3f809c5080fe6f10e7a6980d6f8afd6d16 --- /dev/null +++ b/models/kkj/vocab.txt @@ -0,0 +1,41 @@ +| +á +a +n +e +m +k +y +i +t +ó +ä +u +s +o +w +ñ +d +b +l +j +ë +g +p +à +ã +r +h +ï +ê +è +' +ù +î +ì +f +v +õ +1 +- + diff --git a/models/kle/G_100000.pth b/models/kle/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6406581c315aa065ef7d7b70d1d016b42fa4aa3f --- /dev/null +++ b/models/kle/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9d97671a57430526588acfcc283976662160959a4dd53e22688dc748f3930f +size 145503709 diff --git a/models/kle/config.json b/models/kle/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kle/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kle/vocab.txt b/models/kle/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..15a9187f4034c6d15f031cf99a0de5254bb31cd6 --- /dev/null +++ b/models/kle/vocab.txt @@ -0,0 +1,63 @@ +ू +ध +अ +् +उ +ऋ +े +स +च +ई +ष +ल +भ +ण +घ +ु +ए +ँ +ो +ह +व + +ङ +ज +- +ै +इ +य +ी +ञ +ड +' +ठ +ऊ +ट +ढ +ग +त +ः +न +फ +ा +छ +प +श +ौ +ख +क +‍ +आ +झ +ब +द +ृ +र +थ +ि +ं +_ +ऐ +म +ओ +– diff --git a/models/klu/G_100000.pth b/models/klu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e217bc3a347cedb7a8a0aa1a516522a65afd6969 --- /dev/null +++ b/models/klu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa7a467d109a0652f2373def2b161c74fc3623a60f5b0510c669271886818b9 +size 145493739 diff --git a/models/klu/config.json b/models/klu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/klu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/klu/vocab.txt b/models/klu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..243fa1a2c5e32239275b4ffd42df4d8298328cee --- /dev/null +++ b/models/klu/vocab.txt @@ -0,0 +1,50 @@ +| +̍ +n +a +e +ɛ +l +ɔ +o +i +k +‐ +t +y +b +s +u +m +h +j +w +p +̀ +d +â +è +ê +˖ +ô +ò +̂ +à +ì +î +f +ù +g +û +c +ˆ +ˈ +ǔ +- +ě +ǐ +ǎ +' +̌ +ǒ + diff --git a/models/klv/G_100000.pth b/models/klv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..df2715568f249a639a16404b9a8edcf0693a8aa3 --- /dev/null +++ b/models/klv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1f83d33ebb164cd1a83a9775bf7b92eebb85f3f6598bd9a4d8cf7f8eda5de53 +size 145486805 diff --git a/models/klv/config.json b/models/klv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/klv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/klv/vocab.txt b/models/klv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..31e63b7a817a7d29fe76b13ee3f0d55aa53e39b5 --- /dev/null +++ b/models/klv/vocab.txt @@ -0,0 +1,41 @@ +w +s +v +_ +d +j +y +k +0 +b +o +p +̃ +' +— +1 +m +- +u +3 +a +g +ǝ +6 +2 +l +7 +e +4 +f +ŋ +ṽ +8 +n + +h +i +t +5 +r +9 diff --git a/models/klw/G_100000.pth b/models/klw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..684233c08b90c2448ec498e1df7a1a2e20ff550d --- /dev/null +++ b/models/klw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9451c8069886eb5dad0c44bec3e6e70fe46bdef96648956ad9bc28ea9841e9d8 +size 145476063 diff --git a/models/klw/config.json b/models/klw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/klw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/klw/vocab.txt b/models/klw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9e8b94f44d8d183552f9e23577a6c205c6b5fdef --- /dev/null +++ b/models/klw/vocab.txt @@ -0,0 +1,27 @@ +w +h +o +b +g +y +d + +' +a +s +j +f +i +- +n +e +c +m +t +r +l +u +_ +k +p +z diff --git a/models/kma/G_100000.pth b/models/kma/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..90006b601d69ca94cfee5bf20e711f2decd4f740 --- /dev/null +++ b/models/kma/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6331480931a67fe89e3750a1e8595557e01393978de9f0ecbb241f4635e17e29 +size 145479911 diff --git a/models/kma/config.json b/models/kma/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kma/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kma/vocab.txt b/models/kma/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5e9b091a0c873a4a0afa9e0ad16c2c6638fb198e --- /dev/null +++ b/models/kma/vocab.txt @@ -0,0 +1,32 @@ +| +a +ɩ +i +ŋ +n +e +d +m +b +ʊ +t +k +y +s +g +w +l +u +o +r +v +h +ɔ +p +j +c +f +z +- +' + diff --git a/models/kmd/G_100000.pth b/models/kmd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb9997b648d511620ae548ee6183a1a7f6bd1365 --- /dev/null +++ b/models/kmd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61195dbc8359509c2101f73b059b36b7942b1c0dcf246bb1e8c12ceadf999a8 +size 145483774 diff --git a/models/kmd/config.json b/models/kmd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kmd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kmd/vocab.txt b/models/kmd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..93fb4e078c69d82dd3d73f2e84872c710135c0fb --- /dev/null +++ b/models/kmd/vocab.txt @@ -0,0 +1,37 @@ +p +ꞌ +- +h +n +x +q +t + +_ +e +d +é +y +l +k +g +4 +c +s +o +z +w +0 +ʼ +m +f +1 +u +i +v +a +j +' +r +b +6 diff --git a/models/kml/G_100000.pth b/models/kml/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c1e8446db459427f544e745d16c4fb239e67be5 --- /dev/null +++ b/models/kml/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4e1415f3e90aea98d4783dd825e0bb4ae597b36546e705d878019ac7cdff7a +size 145479917 diff --git a/models/kml/config.json b/models/kml/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kml/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kml/vocab.txt b/models/kml/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8a89065a380c909304ea20b30b3713d7b4a1f826 --- /dev/null +++ b/models/kml/vocab.txt @@ -0,0 +1,32 @@ +e +_ +d +f +t +' +n +j +y +ḵ +h +r +q +o +b +w +u +- +s +g +m +p +k +z +l +ḻ +c +v +x + +a +i diff --git a/models/kmr-script_arabic/G_100000.pth b/models/kmr-script_arabic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..347ac74ef9eaf343d751bc828e458805cb03a7d0 --- /dev/null +++ b/models/kmr-script_arabic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98c05ed13590b77b84be01aca14b1eb9187ef6e14de78fe3511694a4a5430b35 +size 145484515 diff --git a/models/kmr-script_arabic/config.json b/models/kmr-script_arabic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kmr-script_arabic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kmr-script_arabic/vocab.txt b/models/kmr-script_arabic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..59ccee3770f1380b7687de3708ab2951678de4e2 --- /dev/null +++ b/models/kmr-script_arabic/vocab.txt @@ -0,0 +1,38 @@ +| +ە +ا +ی +ن +و +ێ +ر +د +ب +ك +ت +م +ه +ل +س +ئ +ۆ +ڤ +خ +گ +ژ +ز +ش +پ +چ +ج +ڕ +ف +ح +ق +ع +ڵ +غ +m +b +c + diff --git a/models/kmr-script_cyrillic/G_100000.pth b/models/kmr-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..13fb32d4936742293d4485330e9dbdac4841d478 --- /dev/null +++ b/models/kmr-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb0c18b18e06337476954fe9bc0a6f3e951d8cff6ab9f928c655a3eb1efa754 +size 145482225 diff --git a/models/kmr-script_cyrillic/config.json b/models/kmr-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kmr-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kmr-script_cyrillic/vocab.txt b/models/kmr-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cfb936f5d8fecbc2489bbef00cc460aabedf8de0 --- /dev/null +++ b/models/kmr-script_cyrillic/vocab.txt @@ -0,0 +1,35 @@ +| +ә +а +ь +н +е +р +и +д +к +б +w +' +т +м +й +у +һ +с +х +л +ӧ +в +г +ж +о +з +ш +п +ч +q +щ +ф +- + diff --git a/models/kmr-script_latin/G_100000.pth b/models/kmr-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b6c6140bdc9e6ed062c8d74b54bd1e3afe00d10 --- /dev/null +++ b/models/kmr-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ed412103bda67bc50a194bfe4f82fbf13e71753ae7d1a77b3ab2069cd2753ff +size 145482236 diff --git a/models/kmr-script_latin/config.json b/models/kmr-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kmr-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kmr-script_latin/vocab.txt b/models/kmr-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..80e9e2006bc9e95e6d191e917c4987110d571f55 --- /dev/null +++ b/models/kmr-script_latin/vocab.txt @@ -0,0 +1,35 @@ +n +h +ş +ê +e +p +c +x +w +j +d +s +ç +- +o +î +m +û +k +l +a +b +_ +z +' +u +f +v +q + +y +t +i +g +r diff --git a/models/kmu/G_100000.pth b/models/kmu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..343c6ce058a546b12db08b35646b5f0403428750 --- /dev/null +++ b/models/kmu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d593dc4cd4ed61fb4870f4932a4f8c800dc1b706f6c1583aa42b4b3b553ab6 +size 145471489 diff --git a/models/kmu/config.json b/models/kmu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kmu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kmu/vocab.txt b/models/kmu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..60c6e2a6f513efd54cd7615624ce0bdea98bd228 --- /dev/null +++ b/models/kmu/vocab.txt @@ -0,0 +1,21 @@ + +t +u +ꞌ +_ +e +l +n +h +g +a +v +i +s +o +y +p +m +k +- +f diff --git a/models/knb/G_100000.pth b/models/knb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..695d8473657a1f345c1c694a33016020798d8da8 --- /dev/null +++ b/models/knb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754b450d23db468add9ffee03efbb873476364fda50432ce030d6f454054346d +size 145487621 diff --git a/models/knb/config.json b/models/knb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/knb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/knb/vocab.txt b/models/knb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..df9b21d0ad98ca333f78ecec21aacf732c08821a --- /dev/null +++ b/models/knb/vocab.txt @@ -0,0 +1,42 @@ +g +q +ù +l +ì +o +c +t +v +b +- +_ +x +r +w +ɏ +j +e +a +6 +s +d +' +i +ʼ +f +m +k +à +2 +1 +z +p +4 +h +0 +è +n +ò +y +u + diff --git a/models/kne/G_100000.pth b/models/kne/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e4502d1b8d9ae336db023bf398863abea01e6e8 --- /dev/null +++ b/models/kne/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf0ba7be16d15e0703b23cfe0c5ff4c7d229c29bcdb2a8b25d31dcafefc9fc9 +size 145481477 diff --git a/models/kne/config.json b/models/kne/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kne/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kne/vocab.txt b/models/kne/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..61112fba3055abd8fda051e2bbac9bca2d7a8af5 --- /dev/null +++ b/models/kne/vocab.txt @@ -0,0 +1,34 @@ +u +4 +o +_ +w +b +0 +f +m +i +c +g +z +e +l +x +k +n +t +a +s +q +r +j +3 +p +y +1 +v +d + +- +' +h diff --git a/models/knf/G_100000.pth b/models/knf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..94d9bd5640d3cf473819de025dcd98f534d1962b --- /dev/null +++ b/models/knf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7072bc15cc51e0cce9553cc4f5cce07c36429d56cfb37f299c25eb5db4316c9d +size 145479917 diff --git a/models/knf/config.json b/models/knf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/knf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/knf/vocab.txt b/models/knf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3593ffd9d023f21498e3fa96132a23d1c30b2c45 --- /dev/null +++ b/models/knf/vocab.txt @@ -0,0 +1,32 @@ +k +ë +– +e +ş +u +b +' +i +j +y +o +ŋ +ñ +t +_ +l +d +ŧ +m +a +g +ţ +p +- +n + +w +h +c +r +f diff --git a/models/knj/G_100000.pth b/models/knj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e6137c0674823cf31d50171921629105887df516 --- /dev/null +++ b/models/knj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0f5adf89a33d37dbffdf43ac96dc8e4db4e86960bf193d80d4eb286943834af +size 145483783 diff --git a/models/knj/config.json b/models/knj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/knj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/knj/vocab.txt b/models/knj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9ec9e75704941afa82e1af0cb72ce4d7181907b1 --- /dev/null +++ b/models/knj/vocab.txt @@ -0,0 +1,37 @@ +| +a +e +i +n +j +t +c +' +l +o +y +u +x +s +b +m +h +w +k +p +r +d +z +ẍ +q +ú +á +g +— +í +é +v +- +f +ó + diff --git a/models/knk/G_100000.pth b/models/knk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d8de04753d8f2fb0ffb293a117b14a948c17a06 --- /dev/null +++ b/models/knk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4fbb1e347a6449c13d3e624cfec775f354af5c4ccd14c0fb60e81e9beffca7f +size 145476089 diff --git a/models/knk/config.json b/models/knk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/knk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/knk/vocab.txt b/models/knk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a60ca49d88db8e0bdbe537b0e9b0a8fcd56d2dc5 --- /dev/null +++ b/models/knk/vocab.txt @@ -0,0 +1,27 @@ +| +a +n +i +ɛ +l +k +o +m +ɔ +y +u +e +b +r +w +t +s +d +g +- +f +h +p +c +' + diff --git a/models/kno/G_100000.pth b/models/kno/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ee4015c7de61d8deb6181407b8fddbc8de61cb7 --- /dev/null +++ b/models/kno/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3baaba2de3a0e3e6d9d74123bbbd8af5e682aba4bdefaecd9948f27998e137 +size 145474533 diff --git a/models/kno/config.json b/models/kno/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kno/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kno/vocab.txt b/models/kno/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cfab03b77506e170ec7f96733e62ce640a85c48c --- /dev/null +++ b/models/kno/vocab.txt @@ -0,0 +1,25 @@ +| +a +n +m +ɛ +i +ɔ +e +b +o +c +h +k +s +d +t +u +y +w +‐ +f +g +' +p + diff --git a/models/kog/G_100000.pth b/models/kog/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..160fb741039840b93f23fb0043d641dbc6f681c6 --- /dev/null +++ b/models/kog/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4829c85094c174bd5448263e7b0e3bad4282188cd19bfe96b7598a715d371616 +size 145489933 diff --git a/models/kog/config.json b/models/kog/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kog/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kog/vocab.txt b/models/kog/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..347cd9a6a6cf6d1d25296606f6af3a9ed03b44df --- /dev/null +++ b/models/kog/vocab.txt @@ -0,0 +1,45 @@ +x +' +c +_ +ʉ +l +d +í +g +a +i +ñ +p +ó +o +́ +3 +ã +y +2 +z +— +4 +0 +s +ú +n +1 +u +h +e + +6 +t +8 +j +5 +m +w +b +7 +á +é +k +9 diff --git a/models/kor/G_100000.pth b/models/kor/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5695a8d532cca2717e16a54244ee0ff206de181 --- /dev/null +++ b/models/kor/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb54347fdce4eccf107bdbc53445a053f4d0186b7ad49b5de079dccae7bb443e +size 145474425 diff --git a/models/kor/config.json b/models/kor/config.json new file mode 100644 index 0000000000000000000000000000000000000000..993d1dedb1d0c8e820b98f9e2f019ff166327038 --- /dev/null +++ b/models/kor/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.uroman", + "validation_files": "dev.uroman", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kor/vocab.txt b/models/kor/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..c7a54775f378fb2fd26d2a43d1bd850639ea221e --- /dev/null +++ b/models/kor/vocab.txt @@ -0,0 +1,25 @@ +u +_ +t +w +s +o +y +a +h +i +j +- +k +b +c +' +n +l +d +g +r + +e +m +p diff --git a/models/kpq/G_100000.pth b/models/kpq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2eabd4fc3030d36953f85a1853736a27498adb2a --- /dev/null +++ b/models/kpq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d88af8c9ee885796305e19e157cbc8435615b348049097d63ab18d0c46913ed +size 145483781 diff --git a/models/kpq/config.json b/models/kpq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kpq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kpq/vocab.txt b/models/kpq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fb5c16ea47107f435aaeb056ac29cb2485c3cd58 --- /dev/null +++ b/models/kpq/vocab.txt @@ -0,0 +1,37 @@ +- +g +o +t +f +1 +_ +z +l +– +' +s +u +m +b +i +j +3 +p +0 +7 +k +r +e +2 +w +y +d +́ +4 + +a +5 +n +h +c +9 diff --git a/models/kps/G_100000.pth b/models/kps/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d501bf4b66009e48df0c9a7809bdef17499b1e8 --- /dev/null +++ b/models/kps/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72bc1da047159c18f254e9798db501a9e104860377a216bb463ef81b8036d14 +size 145476859 diff --git a/models/kps/config.json b/models/kps/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kps/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kps/vocab.txt b/models/kps/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..301a1c4aa65af6ccbb4333ac3b8489674827506d --- /dev/null +++ b/models/kps/vocab.txt @@ -0,0 +1,28 @@ +| +a +i +o +e +n +t +k +m +w +s +y +r +l +f +h +d +u +b +g +p +- +j +– +z +c +' + diff --git a/models/kpv/G_100000.pth b/models/kpv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a450bfc777c825cf7be2b6a88e8eb9424fcc662 --- /dev/null +++ b/models/kpv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2904536f9f12748e66fb5d4bb10840d0306dc12a039d28554f9ad6b0896032c8 +size 145486086 diff --git a/models/kpv/config.json b/models/kpv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kpv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kpv/vocab.txt b/models/kpv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1b5104e27669268faac9df899632a01fc48b456d --- /dev/null +++ b/models/kpv/vocab.txt @@ -0,0 +1,40 @@ +м +н +ö +б +в +ъ + +ы +я +р +_ +л +к +т +ш +– +i +с +ц +г +ё +п +ю +а +ь +о +е +д +и +і +з +ж +ф +й +- +х +у +щ +э +ч diff --git a/models/kpy/G_100000.pth b/models/kpy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..06d9d2d65c942635dfd2441d5970e9baa918d49c --- /dev/null +++ b/models/kpy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b30d395c9b8904e68845100bacb5dca7c5831f8a2a4270cc90396595f508c7c +size 145485303 diff --git a/models/kpy/config.json b/models/kpy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kpy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kpy/vocab.txt b/models/kpy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e3a9354c3150ab27176a74305072a0fb30ab9b9f --- /dev/null +++ b/models/kpy/vocab.txt @@ -0,0 +1,39 @@ +у +ц +о +х +' +ӈ +ч +в +ж +ӄ + +ю +т +– +й +а +ф +э +ш +я +д +р +е +г +и +_ +н +м +ь +л +ё +с +- +ы +з +к +ъ +б +п diff --git a/models/kpz/G_100000.pth b/models/kpz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f849984ffc578cc7bb3c3887d47c8fc3304974b4 --- /dev/null +++ b/models/kpz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7823411695b97f2f661ef7b839e24a934857a472161cc6e7d093ff697d7cc9b9 +size 145475307 diff --git a/models/kpz/config.json b/models/kpz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kpz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kpz/vocab.txt b/models/kpz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..07dd08470081dac8cccdcd0ff5a2c9d89f56b9b6 --- /dev/null +++ b/models/kpz/vocab.txt @@ -0,0 +1,26 @@ +| +o +k +e +t +y +n +i +u +a +m +p +c +w +l +r +s +ŋ +ö +' +ä +f +ë +ü +- + diff --git a/models/kqe/G_100000.pth b/models/kqe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9adcafa24e6e162b994293c12c7ec2ec5d16b07 --- /dev/null +++ b/models/kqe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b340a3cd2e85d2f9c6603d72518206ebde8721d39b1d6afb0a7028708c40a4 +size 145482211 diff --git a/models/kqe/config.json b/models/kqe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kqe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kqe/vocab.txt b/models/kqe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d4e742b25c2103bde42b6a242afd7c092572c4d7 --- /dev/null +++ b/models/kqe/vocab.txt @@ -0,0 +1,35 @@ +0 +t +i +b +3 +r +1 +w +o +n +p +h +z +u +6 +- +5 +4 +y +s +a +g +_ +7 +e +l +k + +m +9 +8 +j +2 +' +d diff --git a/models/kqp/G_100000.pth b/models/kqp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..020f3d9b8220c975cb1f00cecd72786379048980 --- /dev/null +++ b/models/kqp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be3828d15944c7fb5464b064d17685ccaac92789b94797091b8f6978ef94478 +size 145480693 diff --git a/models/kqp/config.json b/models/kqp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kqp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kqp/vocab.txt b/models/kqp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..391f1cac1a86dcf8c504abe7b44620001c2f84cd --- /dev/null +++ b/models/kqp/vocab.txt @@ -0,0 +1,33 @@ +| +a +e +n +i +ô +r +b +m +k +ɲ +y +é +w +ê +l +g +o +h +t +s +u +ŋ +j +d +p +ɗ +ɓ +c +à +á +- + diff --git a/models/kqr/G_100000.pth b/models/kqr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..69074e34d289a4bc447e5ce7be7d30c9e5e139f8 --- /dev/null +++ b/models/kqr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be48ad017e2cd7e0a26a9401108c6da2ffa3c452e7cd3ab1d39977c52d615dee +size 145488367 diff --git a/models/kqr/config.json b/models/kqr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kqr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kqr/vocab.txt b/models/kqr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2f6957e9fa173fcb9bcbd8286964b640cb890608 --- /dev/null +++ b/models/kqr/vocab.txt @@ -0,0 +1,43 @@ +_ + +' +i +s +û +w +l +a +5 +- +d +4 +m +n +h +b +ô +r +î +y +e +1 +v +g +õ +k +2 +t +c +â +j +z +– +o +f +6 +8 +3 +0 +u +p +ê diff --git a/models/kqy/G_100000.pth b/models/kqy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..73834a6fee55143d62f72448801f0b90b356bbc6 --- /dev/null +++ b/models/kqy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37c5a6bba43c5c83526150728b6fe63d1fbf0e9e9825b1cc4eb5101007b50411 +size 145589765 diff --git a/models/kqy/config.json b/models/kqy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kqy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kqy/vocab.txt b/models/kqy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f4eced93fb91d077c1d1e3f62dc89d5b3bdcfac0 --- /dev/null +++ b/models/kqy/vocab.txt @@ -0,0 +1,175 @@ +| +ን +ኤ +ይ +ሴ +ዬ +ኮ +ስ +አ +ህ +ድ +ዎ +ኡ +ሀ +ረ +ተ +ቶ +ሰ +ነ +ወ +ኔ +ዴ +ኬ +ማ +ሱ +ቤ +ቴ +ገ +ዶ +ት +መ +ኦ +የ +ር +ኑ +ጌ +ሥ +በ +ሶ +ከ +ደ +ሮ +ፈ +ግ +እ +ብ +ም +ለ +ሸ +ኣ +ቱ +ቄ +ቸ +ሎ +ሜ +ል +ዮ +ታ +ክ +ሽ +ሤ +ዼ +ጉ +ዝ +ባ +ሞ +ፌ +ሳ +ሩ +ሌ +ጎ +ኩ +ሬ +ዜ +ቦ +ካ +ሡ +ቾ +ሄ +ቡ +ቁ +ላ +ሁ +ሦ +ዽ +ሼ +ሙ +ዘ +ዸ +ዾ +ጽ +ፍ +ያ +ዱ +ሉ +ኢ +ቅ +ፉ +ጩ +ና +ሹ +ዉ +ዋ +ዞ +ጼ +ቆ +ጹ +ች +ሃ +ዹ +ጸ +ው +ሚ +ሾ +ፋ +ዙ +ሲ +ጭ +ጬ +ጾ +ሠ +ሆ +ጮ +ኖ +ጰ +ዳ +ጵ +ዲ +ዤ +ቼ +ቀ +ጋ +ሻ +ራ +ጄ +ዩ +ፎ +ጴ +ዠ +ዌ +ጫ +ሣ +ጨ +ቹ +ዛ +ዻ +ቃ +ዥ +ጺ +ጻ +ኒ +ጶ +ቻ +ሂ +ጳ +ዺ +ፊ +ኪ +ቢ +ዡ +ዦ +ዣ +ቂ +ሊ +ጱ +ዪ +ጅ +- +ሺ +ዢ +– +ጁ +ዚ +ጊ + diff --git a/models/krc/G_100000.pth b/models/krc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..44873b29eb6ecd557e9608374d9400b38ba114d1 --- /dev/null +++ b/models/krc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:793a404d1e9ae05fdde32f95dc3fe98a1bc0f4108c663d999610ff84368ee800 +size 145483746 diff --git a/models/krc/config.json b/models/krc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/krc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/krc/vocab.txt b/models/krc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e69ea0663ca24dfb35a30dbf030ca7ca02409522 --- /dev/null +++ b/models/krc/vocab.txt @@ -0,0 +1,37 @@ +| +а +н +л +и +е +ы +д +р +г +к +у +ъ +т +б +с +м +ю +з +й +о +х +э +п +ш +ч +ж +ё +я +– +ф +- +ь +в +' +ц + diff --git a/models/kri/G_100000.pth b/models/kri/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..35f9df14747976a994c1a07d1e61274a0c1369fe --- /dev/null +++ b/models/kri/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd16d4a7e1676969487d68c05754baa92a4b0f2501730af8a46193f3534d5f0a +size 145478361 diff --git a/models/kri/config.json b/models/kri/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kri/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kri/vocab.txt b/models/kri/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ab5fd19f4d4c86e0e68d21d8f47dc0520b056fb8 --- /dev/null +++ b/models/kri/vocab.txt @@ -0,0 +1,30 @@ +| +n +i +a +d +ɔ +ɛ +e +t +s +w +l +m +k +u +p +g +b +f +o +r +y +j +z +v +h +c +- +' + diff --git a/models/krj/G_100000.pth b/models/krj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e8cf457d0db1657d7ba96c2bf55743a199079e1 --- /dev/null +++ b/models/krj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eae064c7e7ff667066933dd0ac46b7345c512a19d95e6dfcc4fb984822bdb034 +size 145485289 diff --git a/models/krj/config.json b/models/krj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/krj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/krj/vocab.txt b/models/krj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e9b12bb29941fb47b790089eec450118c33696a6 --- /dev/null +++ b/models/krj/vocab.txt @@ -0,0 +1,39 @@ +a +| +n +g +i +k +o +u +m +s +t +d +p +r +l +y +b +h +w +e +- +j +c +' +f +z +0 +v +q +— +1 +2 +x +4 +3 +5 +7 +6 + diff --git a/models/krl/G_100000.pth b/models/krl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c3b64693f8ca27d5e5ae078d886504bd67d2437e --- /dev/null +++ b/models/krl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0259f264e7a0c6981cca90bc386b9c2505ecf5cd1ef6fbd1da7ad71acb9673 +size 145479129 diff --git a/models/krl/config.json b/models/krl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/krl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/krl/vocab.txt b/models/krl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..690407f01b502c0923c0b0831db40ed741f51fe3 --- /dev/null +++ b/models/krl/vocab.txt @@ -0,0 +1,31 @@ +j +l +f +č +- +i +o +a +u +m +e +_ + +b +p +' +y +h +g +ö +n +v +k +– +d +z +r +š +t +ä +s diff --git a/models/krr/G_100000.pth b/models/krr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b5b7dd72933b48d1c19b63bfe456a1cf5400cee4 --- /dev/null +++ b/models/krr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254a9dde2ee384ea8f7b61d1a39b8b17ad90bc9df3994533784b983309d81625 +size 145499901 diff --git a/models/krr/config.json b/models/krr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/krr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/krr/vocab.txt b/models/krr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2873f65e7534bd978db9ceb189d5d26539d7a663 --- /dev/null +++ b/models/krr/vocab.txt @@ -0,0 +1,58 @@ +| +ា +ែ +ម +រ +៉ +អ +្ +៊ +ត +ប +ន +ង +ក +ហ +់ +យ +គ +ស +ូ +ៃ +ឡ +វ +ិ +ី +ដ +ឹ +ើ +ះ +ព +ច +ឌ +ុ +ំ +ឺ +ល +ទ +ៀ +ណ +ឝ +ញ +ឆ +ឿ +ឞ +ខ +ឋ +ោ +ួ +ជ +ផ +ឃ +ឈ +ឍ +ថ +ភ +ធ +៝ + diff --git a/models/krs/G_100000.pth b/models/krs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..030cc803ec3f854987cdbf3f0c1075894fa56811 --- /dev/null +++ b/models/krs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f38277e9b0dc74274edbce7385a4173105adec3d4bf16e0e08ac59982a94c6f +size 145494493 diff --git a/models/krs/config.json b/models/krs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/krs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/krs/vocab.txt b/models/krs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b216fcf6909a37fe7d01097d34f4055748caced7 --- /dev/null +++ b/models/krs/vocab.txt @@ -0,0 +1,51 @@ +ꞌ +ö +ï +s +ü +z +ã +é +l +- +ĕ +ó +â +w +_ +ẽ +ĩ +' +j +ũ +p +r +ä +û +ŋ +h +ŭ +ë +t +b +õ +n +ă +ĭ +ô +ê +á +m +ḷ +ŏ +c +f +d +k +v +î +í +y +g + +ú diff --git a/models/kru/G_100000.pth b/models/kru/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7ce557f9dc733f4e1a52057c7a6a0fb6cf0c1b49 --- /dev/null +++ b/models/kru/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a7738b551868e4b8056bd188f9dc8c6ab932ccf52ae49ee7f084c5be2ba32f +size 145502185 diff --git a/models/kru/config.json b/models/kru/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kru/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kru/vocab.txt b/models/kru/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b6bfb401bd41facfdcc00bc0d9d9b9917474cb9 --- /dev/null +++ b/models/kru/vocab.txt @@ -0,0 +1,61 @@ +य +ॆ +े +प +' +च +ध +ै +ई +ृ +घ +़ +् +ऒ +ॊ +व +भ +झ +ओ +त +ं + +औ +ज +अ +ु +ख +ल +_ +‍ +इ +ठ +ग +- +ि +छ +ब +स +न +ू +द +म +ढ +र +थ +फ +ो +आ +ौ +ह +ड +ऎ +ी +ए +ट +ऐ +क +ा +ँ +ऊ +उ diff --git a/models/ksb/G_100000.pth b/models/ksb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d84e66064d9d177d5eb56b393c58ef806f692db9 --- /dev/null +++ b/models/ksb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6791d0d2e7755db75c75f7fb4394c5a74c39078e9fe26ea5321cedfcda9d066c +size 145479158 diff --git a/models/ksb/config.json b/models/ksb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ksb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ksb/vocab.txt b/models/ksb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b6fcc3e7ca96cc7a694c3d42aa9764d15f92039 --- /dev/null +++ b/models/ksb/vocab.txt @@ -0,0 +1,31 @@ +g +t +ṃ +i +y +k +l + +' +e +r +m +w +ṅ +c +o +j +— +d +a +_ +z +h +v +p +n +f +b +ú +s +u diff --git a/models/ksr/G_100000.pth b/models/ksr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3ceccdedecfe59cb2c0e2c945a503d7f9d78f7d8 --- /dev/null +++ b/models/ksr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69339fdd1f385710335ba2645ab48653ce47f93501c0525bbd4aa5ab642245fe +size 145486073 diff --git a/models/ksr/config.json b/models/ksr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ksr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ksr/vocab.txt b/models/ksr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d3cdbc507409b68735a156ec430ef03ca7aaaa76 --- /dev/null +++ b/models/ksr/vocab.txt @@ -0,0 +1,40 @@ +a +| +o +ŋ +i +e +m +n +k +u +g +j +l +t +w +r +s +b +q +y +d +p +h +z +- +f +0 +1 +7 +4 +2 +5 +' +6 +3 +9 +8 +c +v + diff --git a/models/kss/G_100000.pth b/models/kss/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d225fae8cfdde1f4c1378dbb32c0c7801ea1c720 --- /dev/null +++ b/models/kss/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:474ca0a70611c4671e51e237bf4cdb7aac8768f8a03192b2dc350584e82d5125 +size 145484643 diff --git a/models/kss/config.json b/models/kss/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kss/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kss/vocab.txt b/models/kss/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bfc576e2e30d011c954bc7def2801665cb1fcbb0 --- /dev/null +++ b/models/kss/vocab.txt @@ -0,0 +1,38 @@ +| +̍ +a +ā +n +l +ɛ +̄ +o +ŋ +m +i +d +ɔ +ī +k +h +y +e +u +ō +w +ū +c +s +p +t +b +᷄ +ē +᷅ +f +g +v +- +j +' + diff --git a/models/ktb/G_100000.pth b/models/ktb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6aaa921c741de59b4111146bc76ed028721558e --- /dev/null +++ b/models/ktb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45d8396ca52a735c0eb7aea51fdcd0465f6826a07589c285f56e824f0df44f3 +size 145597413 diff --git a/models/ktb/config.json b/models/ktb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ktb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ktb/vocab.txt b/models/ktb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4810516df8da58b41a5a493869bbe3dc54df4570 --- /dev/null +++ b/models/ktb/vocab.txt @@ -0,0 +1,185 @@ +| +ን +አ +እ +መ +ተ +ስ +ገ +ት +ም +ዕ +ኔ +በ +ሰ +ከ +ር +ኖ +ህ +ች +ኒ +ነ +ኑ +ደ +ኩ +ዬ +ሱ +ጠ +ረ +ለ +ክ +ሁ +ኦ +ሀ +ቀ +ቶ +ል +ሃ +ዮ +ማ +ሆ +ኬ +ኤ +ሞ +ዳ +ኮ +ቹ +ና +ኣ +ሴ +ታ +ሶ +ቴ +ዎ +ሜ +ጡ +ኡ +ሩ +ሬ +ይ +ብ +ሳ +ዘ +ኢ +ቱ +ባ +ጉ +ሌ +ሮ +ዶ +ላ +ሙ +የ +ግ +ድ +ቆ +ራ +ፈ +ፉ +ጋ +ቡ +ሄ +ሲ +ሽ +ቅ +ጃ +ቤ +ቦ +ሎ +ዋ +ሪ +ካ +ሉ +ቄ +ያ +ጎ +ቲ +ቾ +ፍ +ሚ +ቃ +ጅ +ዱ +ኪ +ጥ +ዉ +ጣ +ዲ +ሸ +ሊ +ሾ +ቢ +ወ +ው +ጀ +ጨ +ቺ +ጢ +ጦ +ጌ +ጮ +ፎ +ጩ +ጭ +ጤ +ሂ +ጊ +ዜ +ጄ +ቁ +ቼ +ጵ +- +ዊ +ሻ +ቸ +ዛ +ሼ +ጳ +ዴ +ጆ +ዝ +ፌ +ጫ +ዪ +ፋ +ጰ +ጴ +ሹ +ኗ +ጬ +ቂ +ዞ +ሺ +ዩ +ዙ +ጪ +ቿ +ቻ +ፊ +ሟ +ጁ +ቧ +ዚ +ጶ +ዌ +ጂ +ቷ +ጲ +ሏ +ጧ +ሯ +ጱ +ሷ +ኘ +ሿ +ዷ +ዟ +ዦ +ኙ +ጷ +ቋ +ኳ +ጯ +ፏ + diff --git a/models/ktj/G_100000.pth b/models/ktj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1865d5b6db05219e776e06c355af26d2cb9bf40f --- /dev/null +++ b/models/ktj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228b2796baa6a4acb0aeff6fd7ff84a5c1650395039505d317a6cd5fc47b46c0 +size 145482249 diff --git a/models/ktj/config.json b/models/ktj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ktj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ktj/vocab.txt b/models/ktj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..319093e59f0e13ab6842a3f3cab9cfc388b5ff59 --- /dev/null +++ b/models/ktj/vocab.txt @@ -0,0 +1,35 @@ +k +ɛ +p +a +i +꞊ +2 +ɩ +w +h +_ +j +b +y +l +ʋ +r +o +ɔ +s +c +ŋ +' +‐ +g + +d +ԑ +f +n +0 +m +e +u +t diff --git a/models/kub/G_100000.pth b/models/kub/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b06b87c05a1f94c8b363e4838c8dd95073e3ba99 --- /dev/null +++ b/models/kub/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49b88d6eaf428a42908e28be82fe3c8c78bb632eb785fa084ead5f4d72f4547e +size 145490011 diff --git a/models/kub/config.json b/models/kub/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kub/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kub/vocab.txt b/models/kub/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6f73db8a3f7c767bc3c8544a450dc241d9c06b15 --- /dev/null +++ b/models/kub/vocab.txt @@ -0,0 +1,45 @@ +| +n +a +t +ī +i +e +k +u +w +m +s +b +ā +r +á +y +ū +ú +g +í +é +ē +d +- +f +p +ō +j +c +ó +o +h +x +l +' +z +ù +à +v +ḿ +ì +è +– + diff --git a/models/kue/G_100000.pth b/models/kue/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e53f11be9b8a77fb8017fe5f3ff5567fd91a8b1 --- /dev/null +++ b/models/kue/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29abb41b0a4438f6e9554760487e799184a55340f8f29b3c71380f48fc4aec25 +size 145483003 diff --git a/models/kue/config.json b/models/kue/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kue/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kue/vocab.txt b/models/kue/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3f0d37c9f018defd4f5a5c7f44f1e1882a730eb6 --- /dev/null +++ b/models/kue/vocab.txt @@ -0,0 +1,36 @@ +| +a +e +n +i +g +o +m +k +r +l +u +d +y +w +b +p +t +s +- +h +j +v +0 +f +1 +2 +7 +' +4 +5 +3 +6 +8 +9 + diff --git a/models/kum/G_100000.pth b/models/kum/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c68240daeb7f4f438986f0dc2d0adcd76030deb1 --- /dev/null +++ b/models/kum/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19130e20d2e22cda7d24b74fea28663ac34f183045544c8b52edd7d912c4bd66 +size 145485167 diff --git a/models/kum/config.json b/models/kum/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kum/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kum/vocab.txt b/models/kum/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e1158ddb2ca6a7b25e3858045716362fe8fb6530 --- /dev/null +++ b/models/kum/vocab.txt @@ -0,0 +1,39 @@ +| +а +н +л +е +г +и +р +ы +ъ +д +у +б +к +т +м +с +о +з +ю +ь +й +п +ш +в +ч +я +э +ё +ж +х +– +ф +0 +1 +2 +4 +ц + diff --git a/models/kus/G_100000.pth b/models/kus/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c9f6e2f7b39a5f951628bbb17f50122b7db4835 --- /dev/null +++ b/models/kus/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74bf22109d1f3e874e0f63882b0ec7993ed06525b28ef352a9d303bcbd3e7e96 +size 145476833 diff --git a/models/kus/config.json b/models/kus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kus/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kus/vocab.txt b/models/kus/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0a9d3a94cd9d991a772d85d77975458fd7d8149d --- /dev/null +++ b/models/kus/vocab.txt @@ -0,0 +1,28 @@ +| +a +i +n +e +u +m +o +l +k +s +b +' +y +d +t +g +p +ŋ +r +w +z +f +j +v +h +- + diff --git a/models/kvn/G_100000.pth b/models/kvn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..62cc296ca6e82306e4b35d7a1f3d76f75e087401 --- /dev/null +++ b/models/kvn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c0d6645e2a76850f84d8d486b3c31a871a257e16f591eca81b36520db029f5 +size 145490823 diff --git a/models/kvn/config.json b/models/kvn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kvn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kvn/vocab.txt b/models/kvn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3175a810788c1c34cd8dfca7fd8d31e037867368 --- /dev/null +++ b/models/kvn/vocab.txt @@ -0,0 +1,46 @@ +m +d +f +p +h +e +k +— +8 +i +b +o +a +í +c +4 +3 +' +6 +l +g +1 +y +0 +n +9 +w +t +é +q +2 +s +_ +v +r +z +- +á +ó +5 +x +j +ú +u + +7 diff --git a/models/kvw/G_100000.pth b/models/kvw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..081522efa5db4a9e66adb916cea383fb82f966bf --- /dev/null +++ b/models/kvw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f113dcd294111acc203cda518553a5c2912f848ac34d212c809dd8723cadb134 +size 145475307 diff --git a/models/kvw/config.json b/models/kvw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kvw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kvw/vocab.txt b/models/kvw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ed32def40a29abeb6e8d4ed61a87f68ac7fac741 --- /dev/null +++ b/models/kvw/vocab.txt @@ -0,0 +1,26 @@ +s +' +m +t +r +l +c +w +j +e +a +- +f +_ +o +n +y +p +d + +i +g +k +h +u +b diff --git a/models/kwd/G_100000.pth b/models/kwd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..131076bb67406fccffc021a02ad7dbf80fa662cd --- /dev/null +++ b/models/kwd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf320449612bca7a855cc9daff7b2e414aef5dccaace41a83f480bbab5fd278f +size 145478361 diff --git a/models/kwd/config.json b/models/kwd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kwd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kwd/vocab.txt b/models/kwd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..25762f5209f9bbf44852a35b0932021a4a3a6ed0 --- /dev/null +++ b/models/kwd/vocab.txt @@ -0,0 +1,30 @@ +h +0 +f +_ +w +k +l + +g +j +v +t +1 +4 +n +r +o +6 +2 +d +e +m +' +p +i +u +s +b +— +a diff --git a/models/kwf/G_100000.pth b/models/kwf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..44a063cf464817be1d9f66df782af2c86824bba6 --- /dev/null +++ b/models/kwf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f871b716f854e3ac8f8288bcf2b1a0f0818ab6db02ba47bc29dc349880cbc2d0 +size 145481469 diff --git a/models/kwf/config.json b/models/kwf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kwf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kwf/vocab.txt b/models/kwf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ba6c567fe60abb55cd18ca6d03637db89e6cacf4 --- /dev/null +++ b/models/kwf/vocab.txt @@ -0,0 +1,34 @@ +n +s +3 + +0 +1 +2 +l +4 +r +k +w +g +9 +c +h +i +b +f +a +6 +d +j +u +5 +p +t +_ +o +e +' +y +m +v diff --git a/models/kwi/G_100000.pth b/models/kwi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..823f2e03b9a56f667df5269c4b799f885854d9c4 --- /dev/null +++ b/models/kwi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:090aa17e62c64c658db4bfd2d3949f1d218aca2e9dc9dcd8cf902a1c8d10cedd +size 145488473 diff --git a/models/kwi/config.json b/models/kwi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kwi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kwi/vocab.txt b/models/kwi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..66bd2832414dc1d5a115a1c2a38dbed4f25b2c7b --- /dev/null +++ b/models/kwi/vocab.txt @@ -0,0 +1,43 @@ +a +| +n +i +u +k +s +t +p +m +z +r +w +e +ɨ +h +l +o +c +d +j +ñ +y +ú +b +í +— +é +' +g +á +ĩ +f +ó +v +ã +q +0 +x +1 +2 +4 + diff --git a/models/kxc/G_100000.pth b/models/kxc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0980ee9bd56e4c25545664589ac4f341bb14da6c --- /dev/null +++ b/models/kxc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f56f03972ffb97819546b6343337414d04728cf087e496dd27cb6189710819f +size 145565941 diff --git a/models/kxc/config.json b/models/kxc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kxc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kxc/vocab.txt b/models/kxc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2366a9107dc89aa7a59119a9d1cf73c187f837a4 --- /dev/null +++ b/models/kxc/vocab.txt @@ -0,0 +1,144 @@ +| +ን +ከ +ተ +አ +እ +መ +ፐ +ረ +ነ +ሴ +ኦ +ሰ +ይ +ቀ +ዴ +ለ +ክ +ኔ +ስ +ር +ደ +ኤ +ዬ +ቶ +ፕ +ት +ቴ +ኮ +ል +ኖ +ም +ድ +ሾ +ሀ +ሽ +ሸ +ፖ +የ +ው +ዋ +ሌ +ወ +ኬ +ኩ +ኸ +ኡ +ቁ +ሱ +ሼ +ፎ +ታ +ሎ +ሞ +ሬ +ሜ +ቆ +ሶ +ሮ +ፈ +ማ +ካ +ዳ +ና +ፒ +ዮ +ቱ +ኪ +ሄ +ኣ +ጨ +ሃ +ቄ +ላ +ኾ +ኻ +ቅ +ፔ +ዌ +ፑ +ዶ +ያ +ዲ +ሙ +ሁ +ፓ +ቃ +ዱ +ፌ +ፉ +ቻ +ቾ +ጫ +ኑ +ፋ +ጩ +ሳ +ፍ +ቸ +ኼ +ህ +ሉ +ጬ +ቲ +ኹ +ሩ +ሆ +ራ +ሚ +ች +ኜ +ኘ +ጭ +ቺ +ዎ +ኒ +ሹ +ሲ +ኝ +ኽ +ሺ +ጵ +ጰ +ሻ +ቹ +ጪ +ዩ +ሪ +ጮ +ሂ +ሊ +ኙ +ኢ +ቂ +ኛ +- +ጴ +ጳ +ኞ +ጶ +ፊ +ኺ +ዊ + diff --git a/models/kxf/G_100000.pth b/models/kxf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0a08f640b1eaf31d2f983c116c1a9fa6974ff79 --- /dev/null +++ b/models/kxf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c5d74e6c5c084f8f2cf51254d9dc657e7372db8c82c239910727d358184d1e7 +size 145484527 diff --git a/models/kxf/config.json b/models/kxf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kxf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kxf/vocab.txt b/models/kxf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8ffcbdf0574b43d9d67c8b3dc90fc27e775168e9 --- /dev/null +++ b/models/kxf/vocab.txt @@ -0,0 +1,38 @@ +| +̌ +h +a +k +̤ +t +û +ô +s +ǎ +p +î +l +ò +n +ǒ +d +è +ě +m +b +e +y +u +w +o +ǐ +ǔ +g +c +i +ṳ +r +j +̂ +' + diff --git a/models/kxm/G_100000.pth b/models/kxm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..efd559fd10852b9960865ab9f545eea77741a1dc --- /dev/null +++ b/models/kxm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b872f5fa3455798de0dbecbfcd3f0f2b6eeae44b913b11b32d038cb27b6936 +size 145491447 diff --git a/models/kxm/config.json b/models/kxm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kxm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kxm/vocab.txt b/models/kxm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a96f6db397173c1d1821b819e8aa0facb4d4a849 --- /dev/null +++ b/models/kxm/vocab.txt @@ -0,0 +1,47 @@ +| +อ +เ +น +ฺ +ย +็ +ี +ร +ง +า +ฮ +ด +ม +ก +ิ +ป +บ +ั +ล +แ +จ +ต +ว +ซ +ค +ื +โ +ท +พ +ะ +ํ +ู +ญ +ุ +ึ +ช +ไ +ฟ +0 +1 +2 +' +` +๋ +- + diff --git a/models/kxv/G_100000.pth b/models/kxv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..25e9c6021c61b3da4a2c2410c23bdb8f359ab380 --- /dev/null +++ b/models/kxv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5462cd125cdb8359926aeb7962689ac02724ab24d8a31d0b2dccb65064d90e2f +size 145487587 diff --git a/models/kxv/config.json b/models/kxv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kxv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kxv/vocab.txt b/models/kxv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f4dffbf98fa8df55ca78f34ff22d0606e1bd3043 --- /dev/null +++ b/models/kxv/vocab.txt @@ -0,0 +1,42 @@ +| +ା +ି +଼ +େ +ର +୍ +ତ +ନ +ଏ +ସ +ମ +ୁ +କ +ହ +ଁ +ଜ +ୱ +ଲ +ଇ +ଆ +ପ +ବ +ଣ +ଚ +ୀ +ଦ +ଗ +ଡ +ଟ +ୟ +ଞ +ଅ +ଙ +ୂ +ଈ +ଃ +ଂ +ଊ +ଉ +' + diff --git a/models/kyb/G_100000.pth b/models/kyb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ec1e93cebf174fedd5c36d48a55c14ca7c25f319 --- /dev/null +++ b/models/kyb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8908a34e70190fd8073c3ba9198ee42fb0ee16bc5bf7b733599132d5bd3a09 +size 145483759 diff --git a/models/kyb/config.json b/models/kyb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyb/vocab.txt b/models/kyb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e8d4ef95b1cddda0d1b1dbfbc3d5a4865dc82f94 --- /dev/null +++ b/models/kyb/vocab.txt @@ -0,0 +1,37 @@ +v +q +' +l +o +w +s +p +g +i +0 +1 +6 +d +– +j +3 +b +n +4 + +h +a +r +f +_ +x +m +z +u +t +- +— +c +k +e +y diff --git a/models/kyc/G_100000.pth b/models/kyc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2814580721434d1a135ec6e3e0fdabef1cb961a3 --- /dev/null +++ b/models/kyc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b9be05c78101acea2baf7b73694d28ef3b5c8b027c581a34a6d4470caf496c +size 145483787 diff --git a/models/kyc/config.json b/models/kyc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyc/vocab.txt b/models/kyc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..38d0d1fb4700720e650717514c40ddaf1b90f7c0 --- /dev/null +++ b/models/kyc/vocab.txt @@ -0,0 +1,37 @@ +a +| +o +n +e +i +p +k +m +y +u +l +d +r +g +s +b +w +t +j +ŋ +- +0 +' +h +4 +1 +c +2 +6 +7 +3 +v +9 +f +` + diff --git a/models/kyf/G_100000.pth b/models/kyf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4111a1fd458b0105686cc37dd679b7b7ccffa823 --- /dev/null +++ b/models/kyf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3007d48595e93b013c2f7637deee9afe2c3b7679dcfa5121d9c85888ba6427a7 +size 145479934 diff --git a/models/kyf/config.json b/models/kyf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyf/vocab.txt b/models/kyf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..71abe31d5c43a8f97aa8546a2f920865a2238aa5 --- /dev/null +++ b/models/kyf/vocab.txt @@ -0,0 +1,32 @@ +s +ŋ +m +_ +j +g +h +w +e +ɔ +z +b +c +u +ʋ +ɩ +- +l +' +k +o +d +a +t +p +y +n +ɛ +f + +v +i diff --git a/models/kyg/G_100000.pth b/models/kyg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..682cc89c7997ca05f486fb881bf92024b13681b1 --- /dev/null +++ b/models/kyg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce81ce40b737ca6b5fa5f219fac8deb0fcf4d8f73876f9ee27787dfab7b4df8 +size 145470687 diff --git a/models/kyg/config.json b/models/kyg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyg/vocab.txt b/models/kyg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..974557c8d6e19b425a4be03643311fa245259d55 --- /dev/null +++ b/models/kyg/vocab.txt @@ -0,0 +1,20 @@ +a +| +e +o +' +i +n +m +g +u +y +h +l +t +p +k +f +v +s + diff --git a/models/kyo/G_100000.pth b/models/kyo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..074a9a14cd120754f0ef877bed2a5d946ec72826 --- /dev/null +++ b/models/kyo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6269b57f674c0eb7231c121526d27e4d4b8f79fcf0653de7b4ee6aef4652c86 +size 145477615 diff --git a/models/kyo/config.json b/models/kyo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyo/vocab.txt b/models/kyo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8f2aa1f41fb310af1250a70c414cfe92afcd7ec2 --- /dev/null +++ b/models/kyo/vocab.txt @@ -0,0 +1,29 @@ +| +a +n +g +i +o +e +é +h +u +l +k +m +y +d +b +t +r +s +ó +w +p +' +- +j +3 +7 +c + diff --git a/models/kyq/G_100000.pth b/models/kyq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..46a799b9a168a0c0fc2259d5ef3617639970e53a --- /dev/null +++ b/models/kyq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3829b33193d21ae4cca147049cdf8b86f9c22fb6381f9b1874f833ddbece4e9 +size 145479949 diff --git a/models/kyq/config.json b/models/kyq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyq/vocab.txt b/models/kyq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f4061bf0eb72bfa4e190d6dc3b9816fd0ec6d47c --- /dev/null +++ b/models/kyq/vocab.txt @@ -0,0 +1,32 @@ +| +a +e +n +k +ɔ +i +t +s +m +ɛ +g +o +u +ɗ +r +j +̰ +l +d +b +' +ŋ +y +ɓ +p +c +‐ +z +w +ƴ + diff --git a/models/kyu/G_100000.pth b/models/kyu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2bee73b772184b5d23fa3558d3666c6d01c4eb10 --- /dev/null +++ b/models/kyu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83acd92dd2a50d658c8dbcbb8236cf5d3146760a843928d31410355afbf7ed2a +size 145485395 diff --git a/models/kyu/config.json b/models/kyu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyu/vocab.txt b/models/kyu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cbffec343a3c5ab19a63c66c7426ce14ec7dbb7f --- /dev/null +++ b/models/kyu/vocab.txt @@ -0,0 +1,39 @@ +ꤢ +| +꤬ +ꤟ +꤭ +ꤩ +ꤛ +ꤧ +ꤣ +ꤒ +ꤔ +ꤪ +ꤚ +ꤤ +ꤨ +ꤕ +ꤊ +ꤥ +ꤜ +ꤗ +ꤌ +ꤞ +ꤘ +ꤡ +ꤋ +ꤦ +ꤙ +ꤓ +ꤖ +ꤏ +ꤝ +ꤠ +ꤑ +ꤐ +ꤍ +ꤎ +m +' + diff --git a/models/kyz/G_100000.pth b/models/kyz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..59b70f97cd099ac31e9b06e310f673960a27868f --- /dev/null +++ b/models/kyz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c395b0315a9efb055a36df54d6d2378d6a8b5072ab2e3f7b44fb01efcac0592 +size 145484519 diff --git a/models/kyz/config.json b/models/kyz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kyz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kyz/vocab.txt b/models/kyz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7fa094fece1d390533b3a5e194b9e167fb0e903d --- /dev/null +++ b/models/kyz/vocab.txt @@ -0,0 +1,38 @@ +g +ẽ +e +4 +o +z +_ +v +ã +6 +a +r +k +— +m +c +ĩ +ũ +õ +w +s +l + +u +b +t +h +n +x +i +f +ỹ +0 +d +' +y +j +p diff --git a/models/kzf/G_100000.pth b/models/kzf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..30c7b69b6db9d0298e25c079c46942497645a1ca --- /dev/null +++ b/models/kzf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb18fb73db7ecc4d2d35028009361f98d0f48bc42ccda91bbabe9585cec5fbed +size 145476085 diff --git a/models/kzf/config.json b/models/kzf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/kzf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/kzf/vocab.txt b/models/kzf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2e8dac141eea50f7a0099152878117fd3ac0924c --- /dev/null +++ b/models/kzf/vocab.txt @@ -0,0 +1,27 @@ +l +t +j +e +d +z +c +k +m +a +i +s +h +g +o +' +n +- +b +u +p +r +_ +y +f + +w diff --git a/models/lac/G_100000.pth b/models/lac/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3884a2af141205ff0f1ee3f2b754a9f97494a08d --- /dev/null +++ b/models/lac/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae84bd1474e0a06013a8c03f53bbbaf8f044451eb93eaee064d67d9eb696055 +size 145483770 diff --git a/models/lac/config.json b/models/lac/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lac/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lac/vocab.txt b/models/lac/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..651242559e27b0d39dfaed0737f09505477a443f --- /dev/null +++ b/models/lac/vocab.txt @@ -0,0 +1,37 @@ +- +a +ó +r +ú +x +l +c +o +s +n +j +e +m +p +u +k + +í +g +_ +d +t +f +ñ +i +b +w +q +h +' +ʌ +é +z +y +á +v diff --git a/models/laj/G_100000.pth b/models/laj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..552709ec9ef17109ad50e9e13a88a42a5f263275 --- /dev/null +++ b/models/laj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9653e82156e50964a9bcacbb90b1bae0b5f173d7fde69b3c6c2ad5a9cce921a1 +size 145476857 diff --git a/models/laj/config.json b/models/laj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/laj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/laj/vocab.txt b/models/laj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..96df84e3ec0d84dc9d931c148b317e3d5b830fcc --- /dev/null +++ b/models/laj/vocab.txt @@ -0,0 +1,28 @@ +| +o +a +e +i +n +k +m +t +r +w +u +y +d +c +b +ŋ +l +p +g +j +- +' +f +v +s +h + diff --git a/models/lam/G_100000.pth b/models/lam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..53bf7c95d22b2be204a223f891f4d31523cb2ac4 --- /dev/null +++ b/models/lam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ac944cc0d0ab46230a4fd4b0099a8bd8ae2925b6aea4845fb700adb4b15ae0 +size 145477603 diff --git a/models/lam/config.json b/models/lam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lam/vocab.txt b/models/lam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d372d74402eafbf649c65f48a672d9a3cc4e7767 --- /dev/null +++ b/models/lam/vocab.txt @@ -0,0 +1,29 @@ +a +| +i +u +e +l +k +n +m +ŵ +o +s +w +t +y +p +f +b +c +ŋ +d +g +' +j +- +— +v +ʼ + diff --git a/models/lao/G_100000.pth b/models/lao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e69dddae02c34713f83127a4839034dcdc2b50e4 --- /dev/null +++ b/models/lao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea127de6e7394448203f6bee8f5ae2ed124e66a76a0b5410e8a4c698b2c09af4 +size 145494405 diff --git a/models/lao/config.json b/models/lao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lao/vocab.txt b/models/lao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9057504f0832f120873142888cd473bbc46cbcdd --- /dev/null +++ b/models/lao/vocab.txt @@ -0,0 +1,51 @@ +ດ +ແ +ຝ +ໃ +ຍ +ຈ +- +ູ +ສ +ະ +ຣ +ບ +ຄ +ນ +ປ +ຜ +ຸ +' +້ +ົ +ິ +ຼ +ງ +ຂ +ີ + +_ +ຖ +າ +ັ +ທ +ໂ +ພ +ຫ +່ +ຟ +ື +ໍ +ວ +ຽ +ຶ +ຮ +ຢ +ເ +ລ +ຕ +ກ +ອ +ມ +ໄ +ຊ diff --git a/models/las/G_100000.pth b/models/las/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4bbf07579938342d978b7fe13e96723a22868741 --- /dev/null +++ b/models/las/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:997dde668fe03994cd487a4d9568921c92222854492887071957d6e52386abe2 +size 145479947 diff --git a/models/las/config.json b/models/las/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/las/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/las/vocab.txt b/models/las/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8246912760de17cf0118186bc9468ab23f725506 --- /dev/null +++ b/models/las/vocab.txt @@ -0,0 +1,32 @@ +| +a +e +n +t +s +ɛ +r +ɩ +i +k +ʋ +w +m +l +ɔ +ꞌ +- +p +y +o +u +ñ +c +ɖ +h +f +ŋ +' +đ +d + diff --git a/models/lat/G_100000.pth b/models/lat/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2710ff8ae470a083e2f833e9260b4fffdbe64b0 --- /dev/null +++ b/models/lat/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c63d33adabfcedc49d8f1c6bc008b00c7cfeadefe4b604ba597b4f196d641fc7 +size 145477625 diff --git a/models/lat/config.json b/models/lat/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lat/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lat/vocab.txt b/models/lat/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2bb75a90b81ef8eedfccc63c8badc40fd5773955 --- /dev/null +++ b/models/lat/vocab.txt @@ -0,0 +1,29 @@ +l +p +b +t +f +z +r +s +í + +_ +— +v +- +ì +c +d +h +i +g +x +n +q +a +m +o +y +u +e diff --git a/models/lav/G_100000.pth b/models/lav/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ae7cb1c794dde7fbbb9993f960f4748bc6fff45f --- /dev/null +++ b/models/lav/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befdd2e310a7ea00e344341f16f70b9524a4a6cd12f2f135835f5ad6b3d9a999 +size 145483883 diff --git a/models/lav/config.json b/models/lav/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lav/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lav/vocab.txt b/models/lav/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..982e3d903433313e12ff9841b223ad76dbe0f6d0 --- /dev/null +++ b/models/lav/vocab.txt @@ -0,0 +1,37 @@ +u +a +v +ķ +b +d +p +o +z +ģ +č +š +h +i +ā +g +e +_ +ī + +– +c +s +f +l +ē +r +n +m +t +ū +k +- +ļ +ņ +ž +j diff --git a/models/law/G_100000.pth b/models/law/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3843052a48ef7bf36ef816a7b56e0e662eca4906 --- /dev/null +++ b/models/law/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f65dc242403d8a78646fa7bc787441e368c43e555d9a6ce89f051678fbaaa95 +size 145478483 diff --git a/models/law/config.json b/models/law/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/law/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/law/vocab.txt b/models/law/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..89498d9f4eb524f1354ac501ddcc1d9eb5e1ec2c --- /dev/null +++ b/models/law/vocab.txt @@ -0,0 +1,30 @@ +u +d +w +m +h +_ +l +' +b +r +s +o + +5 +e +` +j +y +k +g +t +9 +p +c +v +1 +n +a +8 +i diff --git a/models/lbj/G_100000.pth b/models/lbj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbff0ac7e3f0c8eaf0284020d3f662dd032f652d --- /dev/null +++ b/models/lbj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e324b479f985324a7f4e38422bd2f2de2a07799f3758f870332eed7fd6db3f6e +size 145499101 diff --git a/models/lbj/config.json b/models/lbj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lbj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lbj/vocab.txt b/models/lbj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..42ab17ed3189000ceeb9ac4f60695866ff9dafac --- /dev/null +++ b/models/lbj/vocab.txt @@ -0,0 +1,57 @@ +་ +| +ས +ན +ི +ོ +ང +ད +ག +ེ +མ +ུ +བ +འ +ལ +ར +ཡ +ཁ +ཀ +ྱ +པ +ཏ +ཤ +ཞ +ཅ +ྲ +ཟ +ཆ +ཚ +ཕ +ླ +ྐ +ཱ +ྟ +ཉ +ཛ +ྒ +ཐ +ཧ +ཙ +ཨ +ྡ +ྔ +ཇ +ྤ +ྩ +ྙ +ྕ +ྨ +ྫ +ྣ +ྗ +ྭ +ྦ +ྷ +ཝ + diff --git a/models/lbw/G_100000.pth b/models/lbw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd66ac851fbd72165dcff6233fa10510d396273f --- /dev/null +++ b/models/lbw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24fb826c1ee7b49c24041edd55b3e6f3c381aa52753495801634cff9764695f1 +size 145474567 diff --git a/models/lbw/config.json b/models/lbw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lbw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lbw/vocab.txt b/models/lbw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..77d074b031eaed161319f5a295270e10768bd472 --- /dev/null +++ b/models/lbw/vocab.txt @@ -0,0 +1,25 @@ +s +h +' +b +m +w +a +n +- +y +i +k +d +l +e + +_ +o +r +p +g +t +j +u +c diff --git a/models/lcp/G_100000.pth b/models/lcp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..672953066035783fb7471756dc2bf3101731f855 --- /dev/null +++ b/models/lcp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f6e3169dec3255262e6ae845b11d59b7b4c3806a822a417a5cf1a42decfb396 +size 145487605 diff --git a/models/lcp/config.json b/models/lcp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lcp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lcp/vocab.txt b/models/lcp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..07fce5dbb9325c629721b8e76bb60b0f17029fc2 --- /dev/null +++ b/models/lcp/vocab.txt @@ -0,0 +1,42 @@ +ค +ฌ +ด +ะ +_ +ซ +จ +แ +ฮ +ั +ร +k +พ +ิ +โ +ฆ +ช + +ย +า +ี +ต +ึ +ท +ว +่ +ื +ญ +ม +ป +ก +ุ +' +เ +ง +ู +น +อ +ฟ +ไ +ล +บ diff --git a/models/lee/G_100000.pth b/models/lee/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c36f56276b968aaed47c29a6958713e30097d4e2 --- /dev/null +++ b/models/lee/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc55ee40231faa7a9206b8efa2f0ca7c7849274d8cb629fe9c7df92f37ecbfa +size 145504511 diff --git a/models/lee/config.json b/models/lee/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lee/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lee/vocab.txt b/models/lee/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4429b34cd635330685320b946f1dab9c815ee023 --- /dev/null +++ b/models/lee/vocab.txt @@ -0,0 +1,64 @@ +| +ə +n +́ +̀ +y +ɛ +b +w +m +l +ɔ +r +a +d +i +á +z +à +s +h +k +c +t +' +í +ì +g +é +ń +ǹ +̃ +e +ó +ù +u +- +j +p +ò +ú +̌ +è +ẽ +ǎ +ê +ã +ǐ +v +â +ĩ +ŋ +f +o +õ +ǔ +ě +ũ +ô +ǒ +ṹ +ṍ +̂ + diff --git a/models/lef/G_100000.pth b/models/lef/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a6695ade22cdffc5bd8ba7c8593b1b47ea27b17 --- /dev/null +++ b/models/lef/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429edda6883fa615758cec70dfa8225d18790c5adbc0a5e6cbadfe54bce6cc99 +size 145483759 diff --git a/models/lef/config.json b/models/lef/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lef/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lef/vocab.txt b/models/lef/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..353b8e233655fffedacd4952924448d4de3cb3e1 --- /dev/null +++ b/models/lef/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +n +ɔ +u +k +b +l +e +m +ɛ +t +d +y +s +o +w +- +ƒ +́ +p +g +á +ú +é +r +v +í +' +h +ó +ń +ḿ +6 +2 + diff --git a/models/lem/G_100000.pth b/models/lem/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f73232acc6e1b8e215cfadf5119c8d451815274 --- /dev/null +++ b/models/lem/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dba0fabda918849136e85a6e6163d6123f347ed0bcc9a175ea729ea4526d941 +size 145481471 diff --git a/models/lem/config.json b/models/lem/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lem/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lem/vocab.txt b/models/lem/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a681822d41a18c680fbdbca69cea54c3dcf704f6 --- /dev/null +++ b/models/lem/vocab.txt @@ -0,0 +1,34 @@ +| +a +n +ɔ +́ +b +á +ɛ +e +m +y +i +í +k +é +t +ŋ +u +l +s +ú +c +h +o +w +ó +d +f +j +g +ʼ +- +' + diff --git a/models/lew/G_100000.pth b/models/lew/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ee18700d1c4bcf4a406d54ce727d73b0cc2ea56e --- /dev/null +++ b/models/lew/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66846d7d3f514e84971fd5fa00cc312b718f1688ad7ada269201a21a09217ba6 +size 145481472 diff --git a/models/lew/config.json b/models/lew/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lew/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lew/vocab.txt b/models/lew/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9e603a83e15b5d77e4e40e695eea428ef1f8337e --- /dev/null +++ b/models/lew/vocab.txt @@ -0,0 +1,34 @@ +a +| +n +i +u +o +m +t +e +p +r +s +k +l +b +g +d +y +v +h +- +j +f +˻ +˼ +z +c +w +' +1 +— +9 +2 + diff --git a/models/lex/G_100000.pth b/models/lex/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d96780cbf3eb9fa48542b6ad82aa84bd909a28d --- /dev/null +++ b/models/lex/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:711ebfa759eb9b78b412c043d2d71c500ce279c5cf95aa4d678f512d41c0345c +size 145476099 diff --git a/models/lex/config.json b/models/lex/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lex/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lex/vocab.txt b/models/lex/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b4377f634abc4bc1e27ed1724117cc766e13931e --- /dev/null +++ b/models/lex/vocab.txt @@ -0,0 +1,27 @@ +_ +l +e +i +a +o +c +t +w +b +m +k +j +p +s +n +h +u +' +z +r +f +g + +y +- +d diff --git a/models/lgg/G_100000.pth b/models/lgg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..26c56dbc907c74b1db9ea9c3b9d7e35d2e53359e --- /dev/null +++ b/models/lgg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a57a4d131b794d39f2f7c3045f347f7aed56ff253a90cc2658e84548551d78e +size 145481470 diff --git a/models/lgg/config.json b/models/lgg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lgg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lgg/vocab.txt b/models/lgg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..58e0a8e0af27b23e81e55b085132a5b902f25c97 --- /dev/null +++ b/models/lgg/vocab.txt @@ -0,0 +1,34 @@ +| +i +a +e +r +u +n +o +m +d +y +l +k +b +z +t +s +p +g +' +v +f +c +j +w +h +­ +- +0 +1 +5 +3 +q + diff --git a/models/lgl/G_100000.pth b/models/lgl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..10551a346be1f66c07538f07a0f710fba6b27b58 --- /dev/null +++ b/models/lgl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef906630d8f0a00eaa19217e05fadb6fa2b5d094a6e3c0d714a53307e0c966ea +size 145481347 diff --git a/models/lgl/config.json b/models/lgl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lgl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lgl/vocab.txt b/models/lgl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2a588f2016b81465d9d5f6d65c7ee74947563e6c --- /dev/null +++ b/models/lgl/vocab.txt @@ -0,0 +1,34 @@ +2 +d +w +u +e +i +n +g +a +c +5 +_ +m +k +1 + +f +y +l +3 +p +s +r +o +0 +h +b +- +v +4 +j +' +ꞌ +t diff --git a/models/lhu/G_100000.pth b/models/lhu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c66a9560a54058eec46d0d9979cb4876d04cdb4 --- /dev/null +++ b/models/lhu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b88dc93debbe683bbd9a053e3fd5dd7b1e1cd645f1383b76a53950e2034515 +size 145479925 diff --git a/models/lhu/config.json b/models/lhu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lhu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lhu/vocab.txt b/models/lhu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0a5ccb9b5816435d1dc0195d5f53c00ebe0bf0c7 --- /dev/null +++ b/models/lhu/vocab.txt @@ -0,0 +1,32 @@ +| +a +h +e +ˬ +w +t +u +i +v +o +k +' +l +y +n +ˍ +p +c +g +m +ˆ +s +ˉ +d +b +- +j +r +f +z + diff --git a/models/lia/G_100000.pth b/models/lia/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b51bb95f34f1cf981151df5e37d9758b0bf8aca --- /dev/null +++ b/models/lia/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098b6a2dad4742e9b31d15e9112fde8706b657c889388de1b75f055bc2f43089 +size 145476837 diff --git a/models/lia/config.json b/models/lia/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lia/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lia/vocab.txt b/models/lia/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..37a703c4efb2844520bb0373161e978946b216ce --- /dev/null +++ b/models/lia/vocab.txt @@ -0,0 +1,28 @@ +| +a +i +n +ŋ +k +b +ɛ +e +y +m +t +ɔ +h +u +o +d +w +s +l +r +p +g +f +- +— +' + diff --git a/models/lid/G_100000.pth b/models/lid/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d8f4a3ba396a2bb88ad09b79f4df62a81a1232e --- /dev/null +++ b/models/lid/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23b5b1927a6a298c0294cd7a5ccf321315f0543c4e561f73202083771ceddf29 +size 145482245 diff --git a/models/lid/config.json b/models/lid/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lid/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lid/vocab.txt b/models/lid/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1bde0e3062d30e1ff800583168a26db116581a36 --- /dev/null +++ b/models/lid/vocab.txt @@ -0,0 +1,35 @@ +y +1 +d +r +- +n +s +m +t +g +4 +b +9 +e +w +6 +_ + +5 +j +i +a +k +0 +3 +p +o +8 +' +7 +u +2 +h +– +l diff --git a/models/lif/G_100000.pth b/models/lif/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7236d7d3b551af71019b39bce20d33e805f8a4b4 --- /dev/null +++ b/models/lif/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041b3b2ef75c4d9a79e536d71c2bd407225cb55924fcf313b9012366683541d0 +size 145493729 diff --git a/models/lif/config.json b/models/lif/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lif/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lif/vocab.txt b/models/lif/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d15f7a951f272a3995f40dc0fa1dbae64081c079 --- /dev/null +++ b/models/lif/vocab.txt @@ -0,0 +1,50 @@ +् +| +ा +े +़ +‍ +क +न +म +ङ +ि +ु +ल +ॽ +स +ः +ो +र +त +ब +प +ह +य +ख +आ +ग +व +द +इ +फ +थ +च +ज +ओ +छ +भ +ध +ए +घ +अ +उ +– +- +ै +' +ौ +­ +६ +— + diff --git a/models/lip/G_100000.pth b/models/lip/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bdad62ee8497daf06ac339b0f89df80b58298e04 --- /dev/null +++ b/models/lip/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dec0603e2510015633293c3ec19702c1ce330f0aae31c94f095939445fff77c +size 145489140 diff --git a/models/lip/config.json b/models/lip/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lip/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lip/vocab.txt b/models/lip/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f3b4561b05bf065cc50bae6a2a290bada1fba50f --- /dev/null +++ b/models/lip/vocab.txt @@ -0,0 +1,44 @@ +| +ǝ +a +i +n +e +k +o +b +s +t +u +y +l +m +f +ɛ +ɔ +d +w +p +á +é +í +ó +ú +r +́ +ń +h +ã +g +v +̃ +- +ũ +z +ĩ +ḿ +à +ẽ +' +̀ + diff --git a/models/lis/G_100000.pth b/models/lis/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ccec1ee29a9da05ce22312d4199e85d0cb72bf2 --- /dev/null +++ b/models/lis/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd129a9744229dabfc5fa776a4168cb2912eff7c97cf740a245fbdb38bfd3ff6 +size 145497581 diff --git a/models/lis/config.json b/models/lis/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lis/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lis/vocab.txt b/models/lis/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a7f70ab9f53788e87d0cf0508a67e8703773b182 --- /dev/null +++ b/models/lis/vocab.txt @@ -0,0 +1,55 @@ +ꓤ +ꓖ +ꓡ +ꓺ +ꓣ +ꓶ +ꓢ +s +ꓥ +x +ꓙ +ꓕ +ꓛ +ꓫ +ꓳ +ꓪ +ꓵ +ꓱ +ˍ +ꓑ +ꓘ +ꓔ +ꓦ +ꓻ +ꓗ +ꓧ +ꓯ +ꓟ +ꓩ + +ꓲ +ꓹ +b +ꓐ +' +g +ꓒ +- +ꓝ +ꓼ +ꓬ +ꓽ +ꓷ +ꓴ +_ +ꓰ +ꓓ +ꓜ +ꓮ +ꓠ +w +ꓸ +ꓚ +ꓞ +z diff --git a/models/lje/G_100000.pth b/models/lje/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4a11d91ead14654390bebc520e82b9ce7b2d2d4a --- /dev/null +++ b/models/lje/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:694b053d783d0e1d12215659cdcd8093c03032c0a7d59e664635a0122059f0f6 +size 145482241 diff --git a/models/lje/config.json b/models/lje/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lje/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lje/vocab.txt b/models/lje/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8e640afa85d0c0241fb76e118b7dd3480645826a --- /dev/null +++ b/models/lje/vocab.txt @@ -0,0 +1,35 @@ +b +j +t +g +5 +w +e +c +o +_ +7 +3 +n +p +h +u +4 +i +ꞌ +r +1 +d +l +2 +a +- +y +s +f +6 +k + +m +' +0 diff --git a/models/ljp/G_100000.pth b/models/ljp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..add740876b5f60a565ae6b501e0733aec2029335 --- /dev/null +++ b/models/ljp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c42039585adfe76693bb222d14c03ce84fd3bffa46ccdedbf810ecb94601685 +size 145482195 diff --git a/models/ljp/config.json b/models/ljp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ljp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ljp/vocab.txt b/models/ljp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bbc9f501ba401042cd9c2addc1b1804fc55a4c38 --- /dev/null +++ b/models/ljp/vocab.txt @@ -0,0 +1,35 @@ +a +| +i +n +k +u +e +m +l +s +t +r +g +d +h +j +o +b +p +y +- +w +c +f +z +' +0 +v +4 +6 +1 +2 +3 +ë + diff --git a/models/llg/G_100000.pth b/models/llg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a1f200d485b8a2eab43c5a61a0c7ea098257f1f --- /dev/null +++ b/models/llg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d503cd6926c903a46b4f7bbcba64894a7d009c3a08e27c263b0a907d58887377 +size 145475291 diff --git a/models/llg/config.json b/models/llg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/llg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/llg/vocab.txt b/models/llg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..04f2b870161d6be2f1045de395a53d190bb1677f --- /dev/null +++ b/models/llg/vocab.txt @@ -0,0 +1,26 @@ +l +- +b +i +g +n +a +_ +d +s +t + +o +c +k +y +m +f +e +' +x +r +h +p +w +u diff --git a/models/lln/G_100000.pth b/models/lln/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..532062c9220d6551fbb23e0d7455ffd187407f67 --- /dev/null +++ b/models/lln/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4dc5f7e61631f949c529dd1890a71291600674014198490ff139ccb77732cbb +size 145482975 diff --git a/models/lln/config.json b/models/lln/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lln/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lln/vocab.txt b/models/lln/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1578868b9a1a30652347f7ef644acc7686087311 --- /dev/null +++ b/models/lln/vocab.txt @@ -0,0 +1,36 @@ +ỹ +' +l +p +r +s +i +g +h +u +á +í +ɓ +ú +- +j +w +̃ +t +m +b +ó +c +k + +_ +a +é +y +e +d +ɗ +ŋ +n +o +` diff --git a/models/lme/G_100000.pth b/models/lme/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8990bacb9378f528c5dc81e3334a49e82fe7a9d6 --- /dev/null +++ b/models/lme/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e462ca2370a1665ceaf36b11a4884c137acf874945693a470fac379cd929cd30 +size 145483865 diff --git a/models/lme/config.json b/models/lme/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lme/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lme/vocab.txt b/models/lme/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2777b682acd76a166ae4b57d6104a2d2a90ad2c7 --- /dev/null +++ b/models/lme/vocab.txt @@ -0,0 +1,37 @@ +| +a +ə +n +m +i +u +k +w +r +d +y +s +e +' +b +o +t +z +h +ɓ +g +c +f +â +ŋ +l +v +ɗ +ô +j +p +î +û +ê +- + diff --git a/models/lnd/G_100000.pth b/models/lnd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..087d9aab44874e6373d7bcc131198202d672aa8a --- /dev/null +++ b/models/lnd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b98678510d6ef86619654e22cd17d7858544a7b39e53b545c913125c7305bc26 +size 145482973 diff --git a/models/lnd/config.json b/models/lnd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lnd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lnd/vocab.txt b/models/lnd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24eca94aeebbf949ca1c4d80d4a510b9b3d3e3fd --- /dev/null +++ b/models/lnd/vocab.txt @@ -0,0 +1,36 @@ +- +4 +r +k +v +c +e +t +b +2 +y +p +i +u +_ +' +1 +w +j +s +h +d + +5 +m +— +8 +é +0 +n +a +6 +7 +g +o +l diff --git a/models/lns/G_100000.pth b/models/lns/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b477fce6b0d7a30b185513cc66266862c20b7d8 --- /dev/null +++ b/models/lns/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0a9aa0d3f7fd4b4b71c3e13b144918c66d4c1b85dd7249c29dc0a61695078fe +size 145492105 diff --git a/models/lns/config.json b/models/lns/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lns/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lns/vocab.txt b/models/lns/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..555e3ec03058a01d84ad036dd526e9dc3885b9b8 --- /dev/null +++ b/models/lns/vocab.txt @@ -0,0 +1,48 @@ +ó +u +y +ə +ǒ +k +b +ǔ +s +e +ŋ +l +̀ +ù +ú +n +c +ʼ +a +o +- +ǎ +ò +ì +z +m +̌ + +d +é +r +i +h +á +v +t +_ +í +è +g +à +ǐ +j +w +ě +́ +f +p diff --git a/models/lob/G_100000.pth b/models/lob/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e91ecd069ff6249d97fdf80ba3be7e6f5482b43b --- /dev/null +++ b/models/lob/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0824409960856f5fe3271cdcf9c7373033d1f279c5e17ef209c685fa4f94e97 +size 145487581 diff --git a/models/lob/config.json b/models/lob/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lob/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lob/vocab.txt b/models/lob/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..58b0d84f3e6bfb3460a70f9c5dac3b81dd0e96ae --- /dev/null +++ b/models/lob/vocab.txt @@ -0,0 +1,42 @@ +| +a +ɩ +n +ɛ +r +ɔ +k +h +d +t +b +g +ʋ +l +o +i +s +e +̃ +p +m +ι +u +w +j +f +ã +- +y +c +' +ə +ƴ +ũ +ẽ +õ +ɓ +ĩ +v +ǝ + diff --git a/models/lok/G_100000.pth b/models/lok/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..11ac5170ec18e23ce3cfffc2e8860211b0b3b75d --- /dev/null +++ b/models/lok/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad385eef25c42aa991f035cce670fb86422c520a7b789fe70a3d943b2c9763c9 +size 145477597 diff --git a/models/lok/config.json b/models/lok/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lok/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lok/vocab.txt b/models/lok/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3aa0db5682f85b15f94eb9ec5dc368e8b1c77891 --- /dev/null +++ b/models/lok/vocab.txt @@ -0,0 +1,29 @@ +w +e +a +ɔ +l +s +b +d +j +f +o +m +y +i +t +c +_ +n +k +' +g + +h +u +` +p +- +ɛ +q diff --git a/models/lom/G_100000.pth b/models/lom/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..60bc1279aa4712335cf7731d47749c990d3cb426 --- /dev/null +++ b/models/lom/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75580d8e360985b0491e07ad99999ccb1af6d73982698fbd701b825664dbacc7 +size 145499141 diff --git a/models/lom/config.json b/models/lom/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lom/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lom/vocab.txt b/models/lom/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..489e5fb44ed53b00acee48f87987edd50d5ba823 --- /dev/null +++ b/models/lom/vocab.txt @@ -0,0 +1,57 @@ +| +a +i +ɛ +l +e +n +u +g +t +ɔ +o +w +z +ɣ +m +v +k +‐ +é +s +d +y +p +ƃ +ʋ +b +f +ŋ +è +á +à +̀ +́ +ũ +̃ +õ +í +ã +ì +j +ú +h +ù +ẽ +ĩ +ó +ò +ẁ +— +' +ẃ +ɓ +c +ê +- + diff --git a/models/lon/G_100000.pth b/models/lon/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..59aa396f5fe737c4de9a492a21501524e98c52dd --- /dev/null +++ b/models/lon/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:699fee40d54d6fb37eb0515db901856a88da1f8aca5b0f339fd9d24a0ebe26ac +size 145483765 diff --git a/models/lon/config.json b/models/lon/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lon/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lon/vocab.txt b/models/lon/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..61d03572d0b9d0037f9e5b42f1d658c6b7b349d7 --- /dev/null +++ b/models/lon/vocab.txt @@ -0,0 +1,37 @@ +2 +o +w +8 +0 +7 +s +' +6 +y +v +b +_ +4 +c +n +r +u +i +3 +9 +z +h +g +f +j +l + +a +m +e +d +p +1 +5 +t +k diff --git a/models/loq/G_100000.pth b/models/loq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1220f67f7151f4ba934d0545bd1949d95b8284a2 --- /dev/null +++ b/models/loq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b5e6830e2bd540f0e340922e316a88ea4f8173f1ccc8da4e28b9790b1b4cafb +size 145482224 diff --git a/models/loq/config.json b/models/loq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/loq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/loq/vocab.txt b/models/loq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..948add0ae54f3bd4fd5289d5c2e613224da81445 --- /dev/null +++ b/models/loq/vocab.txt @@ -0,0 +1,35 @@ +v +f +w + +ɔ +i +ŋ +a +́ +g +b +̣ +e +p +' +_ +t +̀ +k +d +h +é +r +y +ɛ +n +m +s +z +- +l +u +o +ɓ +ó diff --git a/models/lsi/G_100000.pth b/models/lsi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..26e098f6e0a30966d566fa5b23dd76f3d6607780 --- /dev/null +++ b/models/lsi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04878221a306a6a47726bd9326cc5af1321c5383024b1a02c8979601da91db40 +size 145485415 diff --git a/models/lsi/config.json b/models/lsi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lsi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lsi/vocab.txt b/models/lsi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6cd859d4a231a243aabd3fffb83b0a4c29175b28 --- /dev/null +++ b/models/lsi/vocab.txt @@ -0,0 +1,39 @@ +l +7 +f +- +q +p + +y +h +e +0 +6 +k +3 +x +a +n +5 +i +w +1 +u +j +_ +2 +g +o +' +z +c +s +` +b +d +4 +v +t +m +r diff --git a/models/lsm/G_100000.pth b/models/lsm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f2fe3c516b56f8d213d86cdfc9adff5bb0aa948f --- /dev/null +++ b/models/lsm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5685125685b30d1b882f8738e5b7dc8b4b7b58723d2e536332e838ffb5201ea8 +size 145480700 diff --git a/models/lsm/config.json b/models/lsm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lsm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lsm/vocab.txt b/models/lsm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f78020670ee589c8450c47bed1ad4c3bd9bdf4e8 --- /dev/null +++ b/models/lsm/vocab.txt @@ -0,0 +1,33 @@ +a +| +i +e +o +n +u +b +h +l +m +y +s +w +r +d +k +t +g +c +' +f +j +- +p +0 +1 +2 +4 +5 +3 +q + diff --git a/models/luc/G_100000.pth b/models/luc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c6260083000bb49c8ba983237a8173035ab5f85 --- /dev/null +++ b/models/luc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a3bdb605b28a32a1185aa0f31a093711a4439b48f98bfc0e7c5a21733bffd4 +size 145500647 diff --git a/models/luc/config.json b/models/luc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/luc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/luc/vocab.txt b/models/luc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5e32a616b34380ce55a459794a71c1552215a571 --- /dev/null +++ b/models/luc/vocab.txt @@ -0,0 +1,59 @@ +6 +n +e +ã +a +ô +' +s +ê +p +î +3 +â +i +ộ +_ +- +í +́ + +2 +4 +á +ó +ŋ +ĩ +ụ +o +v +h +ọ +õ +l +ị +u +ẽ +w +ẹ +ệ +y +t +f +û +m +0 +j +ú +k +z +c +d +é +ũ +1 +̂ +g +b +r +̃ diff --git a/models/lug/G_100000.pth b/models/lug/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2164df95e3587d3c82b5e1f3d73f2c198bc90c6e --- /dev/null +++ b/models/lug/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f31b510a33bbe33f924e8260a4f4de1f693032ada61e027ddd5889e7345bb1 +size 145478131 diff --git a/models/lug/config.json b/models/lug/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lug/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lug/vocab.txt b/models/lug/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..3e8ec68774d849384a39939d61e41844f1154450 --- /dev/null +++ b/models/lug/vocab.txt @@ -0,0 +1,30 @@ +z +e +d +f +p +c +4 +j +s +g +r +- +n +w +a +b +k +y +_ +t +o +ŋ + +' +l +h +v +i +u +m diff --git a/models/lwo/G_100000.pth b/models/lwo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..05089f15b145e351faea4a92ad085c9f9fe0d206 --- /dev/null +++ b/models/lwo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb342369ff4d5b893f71c0f6da498cc20dd9dcb5e528fd4b68cea957ce5f2e2 +size 145476869 diff --git a/models/lwo/config.json b/models/lwo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lwo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lwo/vocab.txt b/models/lwo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..118bdd613e137a72c7c48a958fdd1e387816ed38 --- /dev/null +++ b/models/lwo/vocab.txt @@ -0,0 +1,28 @@ +| +a +e +n +h +o +g +i +u +y +d +k +r +m +w +b +j +c +t +l +p +' +q +0 +s +1 +5 + diff --git a/models/lww/G_100000.pth b/models/lww/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f43902a05447bc0ae3390e3a708125d5720412a6 --- /dev/null +++ b/models/lww/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292a6b74e83c2c6b7a5a5b68bd88a5dda284e0d998c90e946dad330fca51df63 +size 145480705 diff --git a/models/lww/config.json b/models/lww/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lww/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lww/vocab.txt b/models/lww/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d9e6cae0bae1851e54cc812f08df173efc5b86fb --- /dev/null +++ b/models/lww/vocab.txt @@ -0,0 +1,33 @@ +8 +4 +g +a +y +k +x +2 +w +- +̃ +1 +s +0 +i +3 +r +n +v +' + +_ +o +6 +e +7 +5 +p +l +u +m +t +9 diff --git a/models/lzz/G_100000.pth b/models/lzz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..830113af94188ba99d2cd911f862dd4a09337d10 --- /dev/null +++ b/models/lzz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bd0ebb34b49bc3c9ba65a43a79616fd574c99088dc023042dade864c79fbb3b +size 145481465 diff --git a/models/lzz/config.json b/models/lzz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/lzz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/lzz/vocab.txt b/models/lzz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..008799599bae4bfcf8090ac1f95a4c3982ab620d --- /dev/null +++ b/models/lzz/vocab.txt @@ -0,0 +1,34 @@ +d +n +ğ +k +̇ +' +ş +ç +z +̆ +b +a +y +_ +s +m +e +x +u +o +f +i +p +t +h +r +c +g +j +v +l + +ʒ +- diff --git a/models/maa-dialect_sanantonio/G_100000.pth b/models/maa-dialect_sanantonio/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..408fd30145c239b439150e0dccb7fb6ea4d38d35 --- /dev/null +++ b/models/maa-dialect_sanantonio/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f9af840a897d052bf5ca8a054851ed6e8db91da53bf493b21f6505e54dee4d +size 145487733 diff --git a/models/maa-dialect_sanantonio/config.json b/models/maa-dialect_sanantonio/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/maa-dialect_sanantonio/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/maa-dialect_sanantonio/vocab.txt b/models/maa-dialect_sanantonio/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cec86094b5809d394975c3062fb83f30f1453eb7 --- /dev/null +++ b/models/maa-dialect_sanantonio/vocab.txt @@ -0,0 +1,42 @@ +| +a +n +̱ +i +j +t +k +s +o +' +‐ +í +á +g +r +e +l +m +ì +x +h +c +d +y +é +ò +è +à +ñ +ó +b +u +f +ú +p +- +v +z +q +ù + diff --git "a/models/maa-dialect_sanjer\303\263nimo/G_100000.pth" "b/models/maa-dialect_sanjer\303\263nimo/G_100000.pth" new file mode 100644 index 0000000000000000000000000000000000000000..5c01c11be7f68ec4340de6d6e5456f9f289f8432 --- /dev/null +++ "b/models/maa-dialect_sanjer\303\263nimo/G_100000.pth" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5924c40a96dff7609b19e42f389d1ee461608c3f6404d79a44d3022528537344 +size 145486845 diff --git "a/models/maa-dialect_sanjer\303\263nimo/config.json" "b/models/maa-dialect_sanjer\303\263nimo/config.json" new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ "b/models/maa-dialect_sanjer\303\263nimo/config.json" @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git "a/models/maa-dialect_sanjer\303\263nimo/vocab.txt" "b/models/maa-dialect_sanjer\303\263nimo/vocab.txt" new file mode 100755 index 0000000000000000000000000000000000000000..b4b68d48ce61253c17ac450850b154a1ccd4cf10 --- /dev/null +++ "b/models/maa-dialect_sanjer\303\263nimo/vocab.txt" @@ -0,0 +1,41 @@ +| +a +n +̱ +i +j +k +t +o +s +' +‐ +e +x +g +í +ì +á +à +l +ò +m +h +c +d +è +y +é +ñ +r +ó +b +u +ú +f +p +z +v +q +- + diff --git a/models/mad/G_100000.pth b/models/mad/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c424a8fb34392a15379763599a1e313593c47f9c --- /dev/null +++ b/models/mad/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3959f13ca6299de304d674a01b9720396c87e37fa054647adb8939e9c20bda34 +size 145481447 diff --git a/models/mad/config.json b/models/mad/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mad/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mad/vocab.txt b/models/mad/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..af7e7ebaf4bdde0c0bf79d8caae99e66bfe2904a --- /dev/null +++ b/models/mad/vocab.txt @@ -0,0 +1,34 @@ +a +| +n +e +s +g +r +o +k +l +b +d +t +p +i +m +' +h +u +y +j +- +c +w +f +z +â +– +ä +0 +3 +ë +2 + diff --git a/models/mag/G_100000.pth b/models/mag/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a660b140accccc5ed13cf18310082e849161d093 --- /dev/null +++ b/models/mag/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786e4f7fdeec3afbf8bdfabbd10e1ed32e6143099df0a62695c2f73dd9b9022e +size 145508347 diff --git a/models/mag/config.json b/models/mag/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mag/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mag/vocab.txt b/models/mag/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f9c819b32ffbc9d7fc1a0fea288887d96ecdbc40 --- /dev/null +++ b/models/mag/vocab.txt @@ -0,0 +1,69 @@ +आ +े +ड +ञ +– +स +म +च +प +ब +ी +घ +ँ +औ +ॅ +ट +ख +छ +ण +ि +ऽ +ʻ +2 +् +भ +र +़ +ल +ै +ह +ओ +ं +झ +द +ʼ +_ +ो +ऐ +ढ +ज +अ +è +‍ +ा +व +उ +ु +ष +ऩ +à +ध +ग +त +क +ौ +ई +ए +ठ +य +न +श +ऱ +ऊ +ू +फ +इ +- + +थ diff --git a/models/mah/G_100000.pth b/models/mah/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d35800c7d6fd6eeef50f49417f8557413e4dbcd --- /dev/null +++ b/models/mah/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb54ae535d0664c2e94edde95baa7d47ab3f32d6cf2e6a38284b428136d238c +size 145484527 diff --git a/models/mah/config.json b/models/mah/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mah/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mah/vocab.txt b/models/mah/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..527e6b9fbabb2f49608655ad54ecc730ed459639 --- /dev/null +++ b/models/mah/vocab.txt @@ -0,0 +1,38 @@ +| +e +a +i +n +k +j +o +r +m +ō +l +t +w +b +ṃ +ñ +ā +p +ḷ +u +ọ +d +s +ṇ +ū +y +g +v +z +h +f +' +- +x +c +q + diff --git a/models/mai/G_100000.pth b/models/mai/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a711a05559f4bbc52df54fcb8dc17f3ff10728b8 --- /dev/null +++ b/models/mai/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04acfc9946d9965cbde54ba06742fa21788d231c403d78d700b177719f5a4e97 +size 145506061 diff --git a/models/mai/config.json b/models/mai/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mai/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mai/vocab.txt b/models/mai/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3e4cf7214b4e631e9b2686ca6de56a55e44b9c1f --- /dev/null +++ b/models/mai/vocab.txt @@ -0,0 +1,66 @@ +| +ा +क +र +ि +ह +े +स +न +् +त +ल +म +प +ँ +‍ +भ +ज +ु +अ +य +ब +ी +द +व +छ +ो +श +ग +आ +ओ +ै +ऽ +ू +ख +थ +ए +च +ध +ट +ष +- +ड +़ +ण +इ +ं +उ +औ +ई +ौ +ठ +फ +ृ +घ +ढ +झ +ञ +— +ऊ +ऐ +ः +' +ङ +ऋ + diff --git a/models/maj/G_100000.pth b/models/maj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..debb4bd167cd9a33c6fcdc1fbaf270efe25c10c9 --- /dev/null +++ b/models/maj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6192bf7e63cbd95ad6d1c2ba6c37323e6ab3cdb84da05809fe54068d912a9407 +size 145482997 diff --git a/models/maj/config.json b/models/maj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/maj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/maj/vocab.txt b/models/maj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2b09dee6e2d47c116fe68dac3ca4716f937d0333 --- /dev/null +++ b/models/maj/vocab.txt @@ -0,0 +1,36 @@ + +u +m +r +p +s +o +v +l +— +y +j +h +g +ú +n +i +e +' +a +k +x +z +c +_ +d +í +á +f +ó +é +̱ +ṉ +t +ñ +b diff --git a/models/mak/G_100000.pth b/models/mak/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbb51f5c1dae61e58149e0157f8014727fdf55d3 --- /dev/null +++ b/models/mak/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd45478b291aca5a0560d5994f01c75787b429c1015075f86a7c96e4bc30b898 +size 145479143 diff --git a/models/mak/config.json b/models/mak/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mak/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mak/vocab.txt b/models/mak/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..79f6ee1ab2c0ff959b2c924fe6451fcf858659ec --- /dev/null +++ b/models/mak/vocab.txt @@ -0,0 +1,31 @@ +a +| +n +i +g +k +m +l +t +u +e +r +s +' +o +p +b +j +y +- +d +c +h +w +f +z +0 +4 +1 +2 + diff --git a/models/mal/G_100000.pth b/models/mal/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4da70c6c61e54856264aad277c1352635872ed8f --- /dev/null +++ b/models/mal/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1836c912d472189f7b09d061c2835f708d6ec20ddffc53b34dd3518b056cacb8 +size 145519959 diff --git a/models/mal/config.json b/models/mal/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mal/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mal/vocab.txt b/models/mal/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..01c840400688a5696f66e6de0458f3798d001e96 --- /dev/null +++ b/models/mal/vocab.txt @@ -0,0 +1,84 @@ +— +ത +ര +ള +ഴ +ഒ +0 +ൂ +ഖ +ഞ +ഇ +ഡ +ി +ഊ +ച +ാ +യ +ഷ +i +ഉ +ഫ +ല +ഠ +4 +െ +ൗ +ആ +സ +ു +ം +m +ജ +ധ +- +ഹ +ഛ +് +ട +ഃ +ഗ +ണ +ൽ +ോ +_ +റ +ഏ +6 +എ +ർ +ഥ +ഘ +ഓ +ീ +ൈ +2 +േ +ക +1 +ൾ +ങ +ഭ + +ഢ +അ +മ +ബ +3 +ഈ +ഔ +പ +ൺ +ന +ൊ +o +ൃ +c +വ +ൻ +ദ +5 +ഐ +' +q +ശ diff --git a/models/mam-dialect_central/G_100000.pth b/models/mam-dialect_central/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e9974254a9c05908e5d835a923f4c8a3bc0ebe87 --- /dev/null +++ b/models/mam-dialect_central/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f677abbd4030d7f7c789bd79934e43fc1e655ab9ba799cf002b065180dc1a9c +size 145489123 diff --git a/models/mam-dialect_central/config.json b/models/mam-dialect_central/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mam-dialect_central/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mam-dialect_central/vocab.txt b/models/mam-dialect_central/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..45961075f7d4ad8a17963dfd050ec4b333b0b3bd --- /dev/null +++ b/models/mam-dialect_central/vocab.txt @@ -0,0 +1,44 @@ +| +t +' +i +n +a +j +e +k +l +y +u +o +x +q +b +m +z +w +s +c +h +p +d +r +- +ú +g +ẍ +é +í +f +v +á +ó +0 +6 +4 +1 +5 +2 +3 +ñ + diff --git a/models/mam-dialect_northern/G_100000.pth b/models/mam-dialect_northern/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9df089c5b46d00c4bdc42e63b408f565ea54be47 --- /dev/null +++ b/models/mam-dialect_northern/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac8580fcaff09ab0925830cfd619d1b50e5e48a3bb5b246f67846c7fd1be120c +size 145492179 diff --git a/models/mam-dialect_northern/config.json b/models/mam-dialect_northern/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mam-dialect_northern/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mam-dialect_northern/vocab.txt b/models/mam-dialect_northern/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..375bfc9eadf099d23c9793b318d69339c55121cf --- /dev/null +++ b/models/mam-dialect_northern/vocab.txt @@ -0,0 +1,48 @@ +| +t +a +n +e +i +ꞌ +c +j +u +l +y +x +o +k +m +b +z +s +w +p +h +r +d +q +- +ú +— +g +ẍ +é +ó +v +í +á +f +ñ +0 +4 +2 +3 +9 +5 +6 +7 +8 +1 + diff --git a/models/mam-dialect_southern/G_100000.pth b/models/mam-dialect_southern/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f97f6a50fdc8f29dd505f9aa1bdc102a76b0644a --- /dev/null +++ b/models/mam-dialect_southern/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ca438b092e7cdfd181fbc00e696cb790bfb21e7f7fb23f4543422366c9ccbc +size 145479947 diff --git a/models/mam-dialect_southern/config.json b/models/mam-dialect_southern/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mam-dialect_southern/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mam-dialect_southern/vocab.txt b/models/mam-dialect_southern/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3c76217841f35d86482c4e35a02d1e29117a3fc2 --- /dev/null +++ b/models/mam-dialect_southern/vocab.txt @@ -0,0 +1,32 @@ +| +' +t +a +n +e +i +j +u +k +l +x +y +o +q +b +m +z +s +w +c +h +p +d +r +- +— +ẍ +g +ú +́ + diff --git a/models/mam-dialect_western/G_100000.pth b/models/mam-dialect_western/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6f9cea81212627057b085e17ea79a4abac2e92f --- /dev/null +++ b/models/mam-dialect_western/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde8dda8483b5a47b0f94b14dc5046d9eccf475b161ddd7198186ba95703bfdb +size 145485311 diff --git a/models/mam-dialect_western/config.json b/models/mam-dialect_western/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mam-dialect_western/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mam-dialect_western/vocab.txt b/models/mam-dialect_western/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6d6cdea952763424c91accbfb227d817aa182c6f --- /dev/null +++ b/models/mam-dialect_western/vocab.txt @@ -0,0 +1,39 @@ +| +t +a +n +i +e +' +j +c +l +x +y +u +o +k +b +s +z +m +w +p +d +r +h +- +̈ +ú +q +— +í +g +é +á +ẍ +ó +f +v +ñ + diff --git a/models/maq/G_100000.pth b/models/maq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b1392c4cd4bfe355e69e2fae928dea434c288e9 --- /dev/null +++ b/models/maq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5742416fc387b80da6f3e2bdd328a99a3695d59ea9d163de101b669901e1dc80 +size 145482237 diff --git a/models/maq/config.json b/models/maq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/maq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/maq/vocab.txt b/models/maq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c517e573f567ad4bd522d9d7215f09eabb7ce830 --- /dev/null +++ b/models/maq/vocab.txt @@ -0,0 +1,35 @@ +d +_ +í +v +t +b +h +g +y +́ +z +ó +l + +p +n +s +é +ú +o +e +m +a +u +ñ +i +c +á +x +ë +r +j +‍ +q +f diff --git a/models/mar/G_100000.pth b/models/mar/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..71682c78f7dbaaade0a2b0cacf8ce6d9c322a890 --- /dev/null +++ b/models/mar/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf63dbba87cfc9d9133b59b6811781c2973f39a87e301a203176a3a8706ab1f0 +size 145511431 diff --git a/models/mar/config.json b/models/mar/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mar/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mar/vocab.txt b/models/mar/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..bc36bbec8bce5faf235e5fdfbcadc0a24c81fda6 --- /dev/null +++ b/models/mar/vocab.txt @@ -0,0 +1,73 @@ +ङ +ह +आ +ब +ढ +ख +2 +ं +त +ट +_ +स +ऊ +1 +क +औ +ळ +च +ओ +- +ई +श +ज +ृ +ग +झ +ʈ +ऐ +द + +ड +ए +ै +ौ +9 +7 +ठ +ऱ +व +ो +ऋ +0 +4 +य +6 +ः +ल +ध +प +ञ +फ +उ +न +ष +म +छ +े +ि +ण +' +अ +इ +ा +ु +ू +ʇ +ऴ +घ +् +र +भ +ी +थ diff --git a/models/maw/G_100000.pth b/models/maw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..43552633e7491eaa1072b515467aebe66df935fe --- /dev/null +++ b/models/maw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635fe9a803ceb2d821dc55f56a8dde7078db9415fee16489d7433b6d5771b9ed +size 145478353 diff --git a/models/maw/config.json b/models/maw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/maw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/maw/vocab.txt b/models/maw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..77a87432389d17060ed28dcd2c59314c3fde7a04 --- /dev/null +++ b/models/maw/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +n +u +m +y +b +s +l +k +d +ŋ +t +r +e +ↄ +ɛ +g +- +o +z +p +w +' +h +f +v +ԑ + diff --git a/models/maz/G_100000.pth b/models/maz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3d2740d3e5a6b2d680b5bf70a210d075a225fdd --- /dev/null +++ b/models/maz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8db3c66b47f97bf6762be280e58df61174a55d19f3d293d2782f1fdf5a6f2b3 +size 145489901 diff --git a/models/maz/config.json b/models/maz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/maz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/maz/vocab.txt b/models/maz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5ab97ab520e90ac11c07cedaba015f7c3cf6de2c --- /dev/null +++ b/models/maz/vocab.txt @@ -0,0 +1,45 @@ +| +a +j +i +e +c +' +o +n +m +u +r +ü +t +s +g +d +y +p +z +ö +b +x +h +ñ +í +̱ +ä +ë +v +ã +l +á +ũ +ó +ṉ +õ +ẽ +ú +f +q +é +ĩ +k + diff --git a/models/mbb/G_100000.pth b/models/mbb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c6ef3c8e330e793c6c1602e85c67d44699f5c6ed --- /dev/null +++ b/models/mbb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:009ce03cd3cf5bea941f7db94938546c286e668c5e2d23d74a9714d720c254e1 +size 145482242 diff --git a/models/mbb/config.json b/models/mbb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbb/vocab.txt b/models/mbb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..04ea46b0e8fcbc626ea1d83f83ecc9963bbfc3f3 --- /dev/null +++ b/models/mbb/vocab.txt @@ -0,0 +1,35 @@ +| +e +n +a +i +u +t +k +s +m +d +h +g +y +l +w +z +p +b +à +v +ǥ +r +- +è +ù +j +ì +o +' +c +f +x +7 + diff --git a/models/mbc/G_100000.pth b/models/mbc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d885553f07851e9ca4d1a77dc3e85f2ea6c0cdac --- /dev/null +++ b/models/mbc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c211f5151ff584d4a8d5564d78aa445d026d5ed2d8147d2d6a8142639ae901 +size 145496822 diff --git a/models/mbc/config.json b/models/mbc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbc/vocab.txt b/models/mbc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..adb93602fef667f6d449db7f808a7193a486477e --- /dev/null +++ b/models/mbc/vocab.txt @@ -0,0 +1,54 @@ +- +k +é +0 +w +h +9 +y +ê +j +z +i +v +â +l +b +p +m +s +í +3 +˼ +5 +_ +t +d +g +c +ô +— +õ +1 +6 +' +n +á +2 + +r +x +a +u +ã +˻ +î +7 +ó +8 +e +4 +f +q +ú +o diff --git a/models/mbh/G_100000.pth b/models/mbh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..78a5b551183c46c62cbe555eef4af46ce6c0d29b --- /dev/null +++ b/models/mbh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e58a301e8782745cae5c17d1293368fa4509c8c4948793f585a5932e1f97f6 +size 145482993 diff --git a/models/mbh/config.json b/models/mbh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbh/vocab.txt b/models/mbh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f670b718c8e7921566373915e727d2ace6abc731 --- /dev/null +++ b/models/mbh/vocab.txt @@ -0,0 +1,36 @@ +| +a +o +e +n +i +g +l +r +m +t +k +u +v +p +s +h +d +w +j +b +0 +1 +2 +' +3 +5 +4 +6 +8 +9 +7 +- +y +f + diff --git a/models/mbj/G_100000.pth b/models/mbj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f8fbc3a0a6f34db1c7bc6559c53635e52e8a23c7 --- /dev/null +++ b/models/mbj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f97f396ec2049fade91b6ac8eba61c532cf0106833dd5eccb832b6138321b98a +size 145491539 diff --git a/models/mbj/config.json b/models/mbj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbj/vocab.txt b/models/mbj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d5d6c17b4d1fa5f2563feae3f9a10ee0584262a --- /dev/null +++ b/models/mbj/vocab.txt @@ -0,0 +1,47 @@ +o +t +s +d +p +0 +9 +y +b +k +̃ +ũ +g +ó +u +h +e + +w +i +é +ä +4 +3 +— +- +j +ã +ë +l +5 +m +6 +a +ẽ +2 +n +r +_ +8 +7 +1 +ǟ +õ +ỹ +ĩ +' diff --git a/models/mbt/G_100000.pth b/models/mbt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..59d263582e15e896eb2f9a67c9f8ffd6ef8b8d2e --- /dev/null +++ b/models/mbt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008360d9af1afbd564069e2856c7f558c43d5fd4555c3183b4923c6a7fa6cb24 +size 145478393 diff --git a/models/mbt/config.json b/models/mbt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbt/vocab.txt b/models/mbt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d4ba00f9ae63c6e166def4f48a9cbd5602c459eb --- /dev/null +++ b/models/mbt/vocab.txt @@ -0,0 +1,30 @@ +| +e +a +n +i +u +k +t +g +m +s +y +d +l +p +w +r +h +b +- +— +' +0 +6 +2 +3 +1 +5 +4 + diff --git a/models/mbu/G_100000.pth b/models/mbu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..26da5d471e748b67821a3f951b88f3169d2ac81a --- /dev/null +++ b/models/mbu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b645f0c19fb5b013c75dd755576c24cde45ca831b3f3b024d90a6420fe868bd +size 145502183 diff --git a/models/mbu/config.json b/models/mbu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbu/vocab.txt b/models/mbu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..26ee7eac61ad8adbd26a3dd1d804ca7230d03c44 --- /dev/null +++ b/models/mbu/vocab.txt @@ -0,0 +1,61 @@ +ě +ǝ +ê +n +é +ú +c +y +h +v +2 +́ +g + +d +p +â +û +- +l +k +á +í +t +ì +ū +i +î +ô +̀ +ó +è +r +s +a +_ +z +0 +b +ŋ +ǎ +ɓ +1 +ā +j +ò +f +6 +ɗ +w +ǔ +e +m +ǒ +u +4 +o +à +' +ə +ù diff --git a/models/mbz/G_100000.pth b/models/mbz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..34662ebf51439f9b445723c40fef66a947dcb260 --- /dev/null +++ b/models/mbz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a915cf46eacd81aa35e106dd9a99cd840cd8fd3ee03de929e944986da4ffd142 +size 145481563 diff --git a/models/mbz/config.json b/models/mbz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mbz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mbz/vocab.txt b/models/mbz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8f705c508e67a4bb044a5ec8d4e90bb127481223 --- /dev/null +++ b/models/mbz/vocab.txt @@ -0,0 +1,34 @@ +| +a +n +i +u +t +k +ꞌ +- +o +r +y +c +h +s +ɨ +v +e +ñ +w +j +m +x +g +— +l +p +í +d +f +b +é +ó + diff --git a/models/mca/G_100000.pth b/models/mca/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7894c2ef532eebea1d3ede421960941110862371 --- /dev/null +++ b/models/mca/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81ddb0b69359d18e2d90eac88985ffc13c57cb25d4d3e750e01dfdd0129e5944 +size 145488369 diff --git a/models/mca/config.json b/models/mca/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mca/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mca/vocab.txt b/models/mca/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f6c919e4e1bdc77510cae6124a2ba7c842931942 --- /dev/null +++ b/models/mca/vocab.txt @@ -0,0 +1,43 @@ +| +i +e +a +ˈ +t +n +j +' +k +q +s +h +u +ƚ +y +p +w +m +l +o +x +f +r +— +d +c +ú +b +g +í +é +ó +z +á +v +- +ñ +1 +4 +0 +3 + diff --git a/models/mcb/G_100000.pth b/models/mcb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..694e2c9f842e8e496b5aef2724a59568f7dbcba1 --- /dev/null +++ b/models/mcb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6519cd28f711c67f7a052e9846dfbe9e503d21625bf05cceb342f69864f10c76 +size 145480681 diff --git a/models/mcb/config.json b/models/mcb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mcb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mcb/vocab.txt b/models/mcb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6a6f818397e8edd31ba99a8b5d72d25852f14952 --- /dev/null +++ b/models/mcb/vocab.txt @@ -0,0 +1,33 @@ +a +i +| +r +t +k +n +e +o +g +s +m +p +v +y +h +u +ñ +c +j +— +0 +1 +' +2 +4 +7 +5 +3 +6 +8 +9 + diff --git a/models/mcd/G_100000.pth b/models/mcd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b9aa6e829f5a8bc7f1b7b85bb457b6e4619639ee --- /dev/null +++ b/models/mcd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a724e4ab8175f47c18de1fe86689bfe8be5f9db87aaeb6c3508c47208d9472 +size 145486841 diff --git a/models/mcd/config.json b/models/mcd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mcd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mcd/vocab.txt b/models/mcd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c45f7f4a4799f9f924573d279374786b65bcd749 --- /dev/null +++ b/models/mcd/vocab.txt @@ -0,0 +1,41 @@ +6 +c +z + +4 +e +f +o +r +v +i +ú +2 +k +b +é +u +_ +ó +' +a +d +m +á +t +q +7 +j +1 +0 +n +s +y +h +p +— +l +x +g +í +ñ diff --git a/models/mco/G_100000.pth b/models/mco/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2cc26357293c2d5e753ced527fbc82c9133d36b1 --- /dev/null +++ b/models/mco/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64665a876ec179042955ebd7d5d7ef3f32164716612b3115adaac0b671f9b83f +size 145488379 diff --git a/models/mco/config.json b/models/mco/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mco/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mco/vocab.txt b/models/mco/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ffb17e3934c1aa54c3689ee334dabbb36dad95cc --- /dev/null +++ b/models/mco/vocab.txt @@ -0,0 +1,43 @@ +x +d +b +z +ë +l +ú +ñ +a +u +ó +ä +̈ +ö +ï +f +p +é +c +e +- +q +n +_ +ü +r +y +i +m +g +o +v +í +t +' +́ +á +ɨ +s +h + +k +j diff --git a/models/mcp/G_100000.pth b/models/mcp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..174fb1e765b1642e8e506f7e08dd0a0b25412592 --- /dev/null +++ b/models/mcp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3eb805a4f4a22bcf47dcda4a7cc789dc8d80e36e8ae81f2c632d46f8e26dcd0 +size 145494519 diff --git a/models/mcp/config.json b/models/mcp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mcp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mcp/vocab.txt b/models/mcp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7d4c39a2ec3db974f33bbe33adf9a7d3be6f87ec --- /dev/null +++ b/models/mcp/vocab.txt @@ -0,0 +1,51 @@ +j +o +u +t +î +ʉ +á +g +r +k +ɛ +̧ +é +ú +û +ê +ǒ +ə +ǎ +l +_ +f +h +ɔ +̌ +b +p +ɨ +̂ +c +ô +- +â +z +i +ŋ +s + +w +a +d +í +ǐ +v +́ +y +ó +n +e +ǔ +m diff --git a/models/mcq/G_100000.pth b/models/mcq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dabc2ca30268cf255f4555b161d97e0e5112f5c4 --- /dev/null +++ b/models/mcq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced1400f517ab412b566e5a59b994b6eed8ab45924bd5fb0b9fa7a4b1aa8c7b5 +size 145481461 diff --git a/models/mcq/config.json b/models/mcq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mcq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mcq/vocab.txt b/models/mcq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..626377903b34c2d4d72c549b725b2ba8fe40c060 --- /dev/null +++ b/models/mcq/vocab.txt @@ -0,0 +1,34 @@ +p +d +j +é +á +o +r +' +t + +g +5 +n +e +h +7 +i +k +ú +0 +- +a +m +w +2 +v +1 +3 +s +u +í +c +_ +6 diff --git a/models/mcu/G_100000.pth b/models/mcu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e760ef43b0dd3a35fdbc5757cb3894499eb1d23f --- /dev/null +++ b/models/mcu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50cda9a34701c51ee7ece940299d0200b62647b01cd9ef4f66fb59499b046850 +size 145500021 diff --git a/models/mcu/config.json b/models/mcu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mcu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mcu/vocab.txt b/models/mcu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6a755af222dd2f7039bf9798915a76219efa0bef --- /dev/null +++ b/models/mcu/vocab.txt @@ -0,0 +1,58 @@ +| +n +é +e +ɔ +b +m +a +è +d +l +s +à +y +t +g +k +ŋ +h +i +́ +á +r +u +ò +̀ +í +w +c +o +â +j +ù +ú +f +ê +ì +ó +v +ô +î +̂ +- +` +p +û +1 +2 +3 +5 +4 +' +7 +6 +9 +8 +0 + diff --git a/models/mda/G_100000.pth b/models/mda/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..814b3396f03da09c315e0efb4628f8b45de2a129 --- /dev/null +++ b/models/mda/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0930cdb7f02bbd845e5adf51651e41a970a5f94adcea5bcf80c886b31935bb12 +size 145498361 diff --git a/models/mda/config.json b/models/mda/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mda/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mda/vocab.txt b/models/mda/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ca351e11fac9ac750a90efed44deaf9498b44bf5 --- /dev/null +++ b/models/mda/vocab.txt @@ -0,0 +1,56 @@ +| +ə +n +̄ +y +g +m +k +ē +s +̀ +r +b +a +d +ā +t +w +l +u +ɛ +e +à +ɔ +ì +i +p +ū +o +z +ī +è +h +ù +j +c +̆ +v +f +̂ +ō +ò +ŭ +ĕ +ă +â +- +ĭ +ê +û +ô +î +é +ŏ +` + diff --git a/models/mdv/G_100000.pth b/models/mdv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0337dabea5312afe89f94441a213e69de66b5118 --- /dev/null +++ b/models/mdv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd64030c550ac035547d539e3e87fd55a11bee83d7ecd1002c58ce44410a71c8 +size 145484529 diff --git a/models/mdv/config.json b/models/mdv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mdv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mdv/vocab.txt b/models/mdv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..04ea7e01a2cf0b95a725b419ef4e7e30a5cf9ffa --- /dev/null +++ b/models/mdv/vocab.txt @@ -0,0 +1,38 @@ +— +j +r +o +̱ +_ +k +x +ñ +n +v +c +a +í +b +m +ú +f +e +g +l +z + +́ +s +t +y +d +p +ó +ɨ +h +ꞌ +- +á +i +é +u diff --git a/models/mdy/G_100000.pth b/models/mdy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c2a587f63e6f470f6dc18d10e5d72a89005f785 --- /dev/null +++ b/models/mdy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ee92b339875edca031dff278712b54be9cb2f2162a8fb825d483dcf3dd7bff +size 145572956 diff --git a/models/mdy/config.json b/models/mdy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mdy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mdy/vocab.txt b/models/mdy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..99302b7a3640e835147ed96e4a00d782f1167486 --- /dev/null +++ b/models/mdy/vocab.txt @@ -0,0 +1,153 @@ +| +ን +ዓ +ሢ +ና +ዒ +ማ +ያ +ይ +ኣ +ዳ +ታ +ዔ +ኔ +ኮ +ቴ +ዛ +ጌ +ሃ +ኬ +ር +ካ +ሲ +ፆ +ባ +ሴ +ዬ +ቶ +ም +ጎ +ጋ +ላ +ዋ +ኤ +ዎ +ኦ +ፓ +ጉ +ዖ +ሮ +ኪ +ጊ +ዲ +ኒ +ሳ +ዴ +ዚ +ፔ +ሣ +ሙ +ቢ +ዻ +ሎ +ኑ +ሪ +ስ +ሶ +ሊ +ሱ +ሺ +ቃ +ቲ +ዶ +ራ +ሚ +ሞ +ቂ +ዞ +ቤ +ሜ +ኢ +ፃ +ኡ +ቄ +ሌ +ዼ +ዑ +ቱ +ዺ +ፒ +ሄ +ፖ +ጮ +ዜ +ጫ +ኖ +ሾ +ቦ +ዱ +ሽ +ሂ +ሼ +ሻ +ኩ +ፂ +ዦ +ል +ው +ዮ +ፑ +ቆ +ሹ +ዾ +ቡ +ሁ +ጴ +ጳ +ቺ +ጪ +ሬ +ቾ +ሩ +ሦ +ዹ +ጲ +ቻ +ቁ +ሉ +ጶ +ፄ +ጬ +ሤ +ዢ +ዙ +ዣ +ብ +ዪ +ዉ +ሡ +ጩ +ዤ +ዌ +ሆ +ሥ +ጂ +ቹ +ቼ +ዊ +ጆ +ፕ +ፁ +ግ +ጄ +ጃ +ዩ +ጱ +ዥ +ክ +ዡ +ዝ +ጁ +ት + diff --git a/models/med/G_100000.pth b/models/med/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..60432e9188329509742da42eeaa4708e88f6002b --- /dev/null +++ b/models/med/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1db695495977e3a3e54a6a90e3af2862db22cc36346f70a0b92ed3062617014 +size 145482987 diff --git a/models/med/config.json b/models/med/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/med/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/med/vocab.txt b/models/med/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2fd8a47c2068d111787accc53a1c902d304339f8 --- /dev/null +++ b/models/med/vocab.txt @@ -0,0 +1,36 @@ +4 +r +o +- +l +t +8 +2 +7 +u +d +e +m +y +w +_ +g +i +b +6 +ʉ +k +0 +a +ⱡ +' +5 +s +ᵾ +n +1 +p +9 +j +3 + diff --git a/models/mee/G_100000.pth b/models/mee/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c075fbae96d1d6ea1c048331540d07c7c1d7875 --- /dev/null +++ b/models/mee/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:843f14d9325edc14a39d31e9ff3f5e1d05415a12a2c83b8cdd64c174ec28adee +size 145474525 diff --git a/models/mee/config.json b/models/mee/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mee/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mee/vocab.txt b/models/mee/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..610fae1bb7126168ec2d0f25f4b64cc82612fb1b --- /dev/null +++ b/models/mee/vocab.txt @@ -0,0 +1,25 @@ +5 +p +m +2 +0 +g +e +' +s +_ +n +i +l +t +— +a +b +u +r +o +3 +4 +v + +k diff --git a/models/mej/G_100000.pth b/models/mej/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..83da2826e01a5711c3e997d8a4709162b5d370c9 --- /dev/null +++ b/models/mej/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29bfc784f98e10911c1c1b9cf11631fc8830ef1ebfab53ab0e46bb87b6d7921b +size 145479821 diff --git a/models/mej/config.json b/models/mej/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mej/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mej/vocab.txt b/models/mej/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b7fef77267f3c1a31f6eb3ba6b11af2f7bcdf26d --- /dev/null +++ b/models/mej/vocab.txt @@ -0,0 +1,32 @@ +w +' +1 +f +t +4 +d +z +2 +k +l +r +p +s +b +i +h +_ +o +j +g +6 +a + +u +y +c +e +n +0 +m +- diff --git a/models/men/G_100000.pth b/models/men/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..82cbd2f26510bb0ea8e1cf351b36ed01f2f34f2c --- /dev/null +++ b/models/men/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab7e59aa94b3dfe12e5d4992c5cb4e586febd8603a15abcbe0f335514e9f1297 +size 145479125 diff --git a/models/men/config.json b/models/men/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/men/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/men/vocab.txt b/models/men/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..28d73a40ca9f7c1f8a5389d473c9e1b6112337ef --- /dev/null +++ b/models/men/vocab.txt @@ -0,0 +1,31 @@ +| +a +i +n +e +l +g +ɛ +u +ɔ +m +w +y +t +k +o +b +h +s +p +d +j +v +f +r +- +ŋ +z +' +c + diff --git a/models/meq/G_100000.pth b/models/meq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d6c0f19b0edb715f9e68b5f4d8ddc2dad3f6671 --- /dev/null +++ b/models/meq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16c803aa028a7b144f039d61c5b28282fe4e61dbf51ae6953492445590a1cbf +size 145483759 diff --git a/models/meq/config.json b/models/meq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/meq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/meq/vocab.txt b/models/meq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ec92933a772f4c37f50b721187128651a9bd19b8 --- /dev/null +++ b/models/meq/vocab.txt @@ -0,0 +1,37 @@ +| +a +e +y +ə +m +n +t +k +l +h +i +d +w +r +g +s +b +u +o +z +ɗ +ŋ +f +v +p +â +ɓ +ɨ +œ +ʼ +- +0 +4 +3 +2 + diff --git a/models/met/G_100000.pth b/models/met/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..beeb9d36a9b0503a6903d687fd4016581c49d313 --- /dev/null +++ b/models/met/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b86c95853a9d4f51967f7a75fff1888790286c62e9a070dd460b2eb83504222 +size 145482973 diff --git a/models/met/config.json b/models/met/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/met/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/met/vocab.txt b/models/met/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ba4afaf53ecbba492e1e109a1c618f152466ee0d --- /dev/null +++ b/models/met/vocab.txt @@ -0,0 +1,36 @@ +1 +_ +- +w +b +p +o +k +8 +y +2 +9 +5 +h +t +g +n +d +0 +4 +r +a +7 +i +s +6 +x +l +e +u +— +3 +ʼ +' + +m diff --git a/models/mev/G_100000.pth b/models/mev/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..63eb9bf6d7bd88e4a79b87bce82849b0a57c305f --- /dev/null +++ b/models/mev/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8c43c9685a45f0eaaaef999b473797bb3d36c45e871f3807acf1c9904e06e2 +size 145493773 diff --git a/models/mev/config.json b/models/mev/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mev/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mev/vocab.txt b/models/mev/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b648dc633667d9d1d46b232132f4ff605a06bca --- /dev/null +++ b/models/mev/vocab.txt @@ -0,0 +1,50 @@ +| +ɛ +̀ +l +k +ɔ +à +́ +a +á +m +e +̃ +o +é +w +í +i +g +ì +y +è +z +p +- +ó +n +d +b +ò +s +ɓ +ĩ +t +‐ +ã +ú +ù +f +v +u +ũ +ḿ +ṹ +ƃ +h +ŋ +— +' + diff --git a/models/mfe/G_100000.pth b/models/mfe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3486530de93284aed69077d0288f8f7ca82e5b7a --- /dev/null +++ b/models/mfe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62677c4c974cd48f06fc0f8adb0c2d3004f2ef0880c6f13ee7fb99c6650ce606 +size 145479145 diff --git a/models/mfe/config.json b/models/mfe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfe/vocab.txt b/models/mfe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..72f516167fad6083ee74dbdda1e68612d70d210f --- /dev/null +++ b/models/mfe/vocab.txt @@ -0,0 +1,31 @@ +| +n +a +e +i +o +r +t +l +s +k +p +z +m +u +d +b +f +v +w +g +y +- +x +' +0 +h +1 +2 +– + diff --git a/models/mfh/G_100000.pth b/models/mfh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1237593cac0a40a295cffa7211ee63fdd58b60ec --- /dev/null +++ b/models/mfh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dabbf51eaaa8142ee985cb94f0eaee7f8eeaf4905a8c037ccc6fa7b88890235 +size 145485318 diff --git a/models/mfh/config.json b/models/mfh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfh/vocab.txt b/models/mfh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..06d7632536be1743b3e395d69e40c2589e359506 --- /dev/null +++ b/models/mfh/vocab.txt @@ -0,0 +1,39 @@ +a +| +l +à +k +ə +g +u +m +w +z +s +t +ŋ +y +n +i +h +d +o +b +e +ɗ +v +r +p +f +̀ +ɓ +' +ò +ù +ì +è +- +j +c +1 + diff --git a/models/mfi/G_100000.pth b/models/mfi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6dd5af7f6e71b5b61432cbaf0e978611e013f55c --- /dev/null +++ b/models/mfi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0408e56c31bb94ed3ca5d13b8f5c6933ba98aa8723fb2f9d2b8e9c83fc54cb6e +size 145481471 diff --git a/models/mfi/config.json b/models/mfi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfi/vocab.txt b/models/mfi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a0cf01c5467046417ec247f2feb7518c29cd31ef --- /dev/null +++ b/models/mfi/vocab.txt @@ -0,0 +1,34 @@ +a +| +e +n +á +r +i +m +k +d +t +u +s +y +w +l +b +g +h +ŋ +z +ɗ +- +v +f +p +j +c +ɓ +ƴ +í +ú +ꞌ + diff --git a/models/mfk/G_100000.pth b/models/mfk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..45134c5f3197f1ad972f70fe4be2b376a3880c4f --- /dev/null +++ b/models/mfk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18c1f9a7bd40edcacca1905dd39a3323374b461d67a87c74b1b32de07ded9ff +size 145479931 diff --git a/models/mfk/config.json b/models/mfk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfk/vocab.txt b/models/mfk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7653a62e9a96fde8c2335d696d5bf86f73ee34cf --- /dev/null +++ b/models/mfk/vocab.txt @@ -0,0 +1,32 @@ +d +m +ɗ +v +e +s +k +g +n + +l +c +a +o +_ +- +' +z +i +ä +u +h +ŋ +y +r +ə +f +t +b +p +ɓ +w diff --git a/models/mfq/G_100000.pth b/models/mfq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..051db400583f26767719bd0179955233a4c0b892 --- /dev/null +++ b/models/mfq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b19da894399c2dea850794c760425247dfffd3b1b1bdd8a7c0eee2fdc1e2360 +size 145476830 diff --git a/models/mfq/config.json b/models/mfq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfq/vocab.txt b/models/mfq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3aacd6288911574091e7ad0cab37ac9027cbbe82 --- /dev/null +++ b/models/mfq/vocab.txt @@ -0,0 +1,28 @@ +| +ɑ +n +i +k +u +b +t +l +e +m +s +y +o +ŋ +d +p +ɔ +' +w +j +f +g +c +a +- +h + diff --git a/models/mfy/G_100000.pth b/models/mfy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..27e4d20c696f9cdbb875aa570192d770c838054a --- /dev/null +++ b/models/mfy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1728b938ab90956dd60fee03e1857133418b119ec9d34b8d8fac1504324f8d0 +size 145487621 diff --git a/models/mfy/config.json b/models/mfy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfy/vocab.txt b/models/mfy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5dde52492fc4227d415e1bf00365b0a25b206630 --- /dev/null +++ b/models/mfy/vocab.txt @@ -0,0 +1,42 @@ +| +a +e +t +u +k +i +m +n +o +j +h +b +s +r +á +é +p +ä +c +í +l +y +ü +ï +ë +ú +d +ó +ö +g +‐ +— +ñ +f +v +- +z +q +' +x + diff --git a/models/mfz/G_100000.pth b/models/mfz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a6a686e7f41417bcb368194cccfe8727f8f3bb0 --- /dev/null +++ b/models/mfz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a51f99e4bd73d25faf7eb079dd214793d30eb2af65df71078259e6d5c6d68c58 +size 145479157 diff --git a/models/mfz/config.json b/models/mfz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mfz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mfz/vocab.txt b/models/mfz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ae26df3676444f25f961b1d45091458d888fdd8a --- /dev/null +++ b/models/mfz/vocab.txt @@ -0,0 +1,31 @@ +| +a +ɛ +n +i +k +e +ɔ +y +t +o +c +ŋ +m +b +l +g +w +d +u +j +r +p +ñ +ṭ +ḍ +h +' +ç +š + diff --git a/models/mgd/G_100000.pth b/models/mgd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a5b9cb072ece78828bdc665ca18f4bf4f2c85e7 --- /dev/null +++ b/models/mgd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1412b4cf13b5d3282d7db718bf4bd552192ee109adf9ea1c93e288e6e5329f8b +size 145486159 diff --git a/models/mgd/config.json b/models/mgd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mgd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mgd/vocab.txt b/models/mgd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4839089abc630b9090a4972f6615df92febf2ec5 --- /dev/null +++ b/models/mgd/vocab.txt @@ -0,0 +1,40 @@ +f +c +î + +z +i +_ +m +ù +ä +n +e +v +a +h +y +à +k +p +ò +w +ì +2 +ŋ +r +t +d +b +è +j +s +l +u +g +' +í +̀ +o +- +ṛ diff --git a/models/mge/G_100000.pth b/models/mge/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a5e1a100e9d3692020d13e49dfddf30a7c2c6a0 --- /dev/null +++ b/models/mge/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9feb0ccb69d0181ecfb46a144bb86463f672afc4d8520290f54b0a26c05f3b9f +size 145489153 diff --git a/models/mge/config.json b/models/mge/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mge/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mge/vocab.txt b/models/mge/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cf72904668fb79d6dee6c659f3f8eaf512504cf0 --- /dev/null +++ b/models/mge/vocab.txt @@ -0,0 +1,44 @@ +ꞌ +j +ò +k +̰ +m +g +ɗ +u +w +ɨ +o +à +́ +t +_ +b +ṵ +y +s +ɛ +á +e +a +l + +ɔ +r +ḭ +i +p +é +- +̀ +– +ɓ +n +ə +d +è +ù +ó +ḛ +ú diff --git a/models/mgh/G_100000.pth b/models/mgh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e474d8c21f03b11321ac67e897ec515b7f7c8147 --- /dev/null +++ b/models/mgh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07535eb658a7be6310dd67acdfaee12d1668953cd75326ed342d13a2dbac506 +size 145480699 diff --git a/models/mgh/config.json b/models/mgh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mgh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mgh/vocab.txt b/models/mgh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..76f40727651610d0f81d01767274dfbbbc84cbe4 --- /dev/null +++ b/models/mgh/vocab.txt @@ -0,0 +1,33 @@ +2 +- +p +u +m +o + +_ +k +' +r +f +c +y +e +i +1 +z +b +w +l +s +d +8 +0 +a +6 +n +j +t +h +v +g diff --git a/models/mgo/G_100000.pth b/models/mgo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..588c3ccd83dc1d7e300a7432e37c5dd23206a7c3 --- /dev/null +++ b/models/mgo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3b7b98ac57fbb73cbcdd8cd8e886cc75d01b9d7c4cad2f2de6cafae3fc1d407 +size 145494533 diff --git a/models/mgo/config.json b/models/mgo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mgo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mgo/vocab.txt b/models/mgo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..85619b99f04f19ca6a31cc9ea23cea77fc9c77f7 --- /dev/null +++ b/models/mgo/vocab.txt @@ -0,0 +1,51 @@ +l +ʼ +h +ǐ +n +ě +ŋ +g +m +ǒ +ò +̀ +ì +c +ô +ǔ +u +v +ù +f +k +z +ə +̂ +ɨ +ǎ +à +â +ê + +a +e +r +y +d +i +- +b +_ +̌ +j +û +o +p +' +è +î +s +ɔ +w +t diff --git a/models/mhi/G_100000.pth b/models/mhi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a71cd01ab8f7f192510f2f902927777992995219 --- /dev/null +++ b/models/mhi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52a4cf1c67948da5f4ea049436283d17041786a7779fad78620efbde889ce93 +size 145477713 diff --git a/models/mhi/config.json b/models/mhi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mhi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mhi/vocab.txt b/models/mhi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..68b2365993fd48f68586211c23d4e42bdb72056d --- /dev/null +++ b/models/mhi/vocab.txt @@ -0,0 +1,29 @@ +| +i +a +r +o +n +u +e +d +k +l +s +b +' +t +m +y +g +j +z +v +p +w +c +h +f +- +— + diff --git a/models/mhr/G_100000.pth b/models/mhr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..739040fb3103ef48f9d4629f9c5197d5ad6adbc0 --- /dev/null +++ b/models/mhr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e2cf343ebf039af61593c10306cf5c12677d53a0b71f8967fc03892b3b69af +size 145487597 diff --git a/models/mhr/config.json b/models/mhr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mhr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mhr/vocab.txt b/models/mhr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c0a8c8d00e1dc23a5975be35c8a37c90a961d432 --- /dev/null +++ b/models/mhr/vocab.txt @@ -0,0 +1,42 @@ +ӧ +п +я +б +ш +т +х +1 +щ +а +ж +ы +ӱ +е +4 +р +- +– + +н +в +и +с +з +ч +ҥ +о +м +ъ +г +' +й +ь +ю +л +ф +_ +ц +к +д +э +у diff --git a/models/mhu/G_100000.pth b/models/mhu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9f319338c2e78a0311810017bead6d7d09e21bad --- /dev/null +++ b/models/mhu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c82d7c9e7935c1b4c8f76ddcbc54f093f31be0e45307358c4d28b8f44ad41f7 +size 145481447 diff --git a/models/mhu/config.json b/models/mhu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mhu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mhu/vocab.txt b/models/mhu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7d8414fee7c8d64262f5c50b8ed625fe097ba3c1 --- /dev/null +++ b/models/mhu/vocab.txt @@ -0,0 +1,34 @@ +̃ +- +q +' +k +p +u +h +ũ +a +d +s +r +_ +ü +v +n +c +l +o +g +ẽ +z +e +ǃ +t +ã +j +y +m + +w +b +i diff --git a/models/mhx/G_100000.pth b/models/mhx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ebad7027ff47970036069bce9c0580c63cb7773 --- /dev/null +++ b/models/mhx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2616c3410e47235d4933143bbf2a1eb6b3185bc9ff48a1d47e6db278ffe6fed9 +size 145482219 diff --git a/models/mhx/config.json b/models/mhx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mhx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mhx/vocab.txt b/models/mhx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..15e932763210065d7e45f8abd37b2cb5ade6901e --- /dev/null +++ b/models/mhx/vocab.txt @@ -0,0 +1,35 @@ +o +k +e +1 +g +b +x +q +p +n +- +a +i +0 +' +6 +f + +c +t +w +s +y +v +4 +d +2 +u +r +j +_ +m +z +h +l diff --git a/models/mhy/G_100000.pth b/models/mhy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..94d0d2b895b8eb9affb502a161362d7231144f6b --- /dev/null +++ b/models/mhy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d13c036415efab1730d5719cd7e455694fd511a8bfef73b120b6da488b966a69 +size 145480783 diff --git a/models/mhy/config.json b/models/mhy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mhy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mhy/vocab.txt b/models/mhy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0ed171ff909753253aaacedeffa93c83b6866998 --- /dev/null +++ b/models/mhy/vocab.txt @@ -0,0 +1,33 @@ + +j +r +p +a +y +u +h +1 +l +4 +5 +_ +n +3 +b +t +d +e +m +k +2 +s +w +o +c +z +0 +f +- +i +6 +g diff --git a/models/mib/G_100000.pth b/models/mib/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f5814e2cba18850a375fdfbca44cc6ef9fd0534 --- /dev/null +++ b/models/mib/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52bd4c42055f707fdd5cdb48ea40d227a4cabb5759b9640389c8bab2232a8104 +size 145488363 diff --git a/models/mib/config.json b/models/mib/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mib/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mib/vocab.txt b/models/mib/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..03f67eed7550958ef6475b39b1f9e52c1515cf2a --- /dev/null +++ b/models/mib/vocab.txt @@ -0,0 +1,43 @@ +á +j +ā +p +c +l + +ñ +a +í +‐ +u +x +h +i +s +n +- +g +ó +k +́ +q +e +é +z +ɨ +v +r +ō +t +ē +y +f +ū +b +m +d +o +_ +ī +̄ +ú diff --git a/models/mie/G_100000.pth b/models/mie/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..15ef7bc35d77701f0b8cae3ba6d3cf81f75da0c9 --- /dev/null +++ b/models/mie/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:779baf753e78d4dfda80aa332461207e17a0dcc0ec9bb0f98d595f3d4b1fad34 +size 145485295 diff --git a/models/mie/config.json b/models/mie/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mie/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mie/vocab.txt b/models/mie/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b62025a2ca119faf196a30c4452c64e28126ee2b --- /dev/null +++ b/models/mie/vocab.txt @@ -0,0 +1,39 @@ +b +ē +g +ī +c +á +z +é +ū +_ +- +f +ō +x +h +k +a +y +s +r + +j +o +v +ó +m +t +u +ñ +í +ā +e +n +d +q +p +i +l +ú diff --git a/models/mif/G_100000.pth b/models/mif/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5b5b75a29b52a843652816384a0a754995c626f --- /dev/null +++ b/models/mif/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41174d0e81cfecdbe34c3b6327f80aceabed3a6654f9abec2d09ad1f2b71395d +size 145479039 diff --git a/models/mif/config.json b/models/mif/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mif/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mif/vocab.txt b/models/mif/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8934e87e04bd83be086699e74af388e114fc3c16 --- /dev/null +++ b/models/mif/vocab.txt @@ -0,0 +1,31 @@ +a +| +y +e +m +ə +d +w +r +l +k +g +n +s +h +t +ŋ +b +z +v +u +f +ɗ +c +i +p +j +- +ɓ +' + diff --git a/models/mih/G_100000.pth b/models/mih/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..61e4f204d16725fee95596b01462d49abefbb27a --- /dev/null +++ b/models/mih/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b767698148b4645442f0449ce427bff033fa58a4783eca5c6e2fbc496ce77587 +size 145484625 diff --git a/models/mih/config.json b/models/mih/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mih/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mih/vocab.txt b/models/mih/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..10f0535955ee5ef3916d5e73bccac8d296b42180 --- /dev/null +++ b/models/mih/vocab.txt @@ -0,0 +1,38 @@ +g +q +l +e +r +ꞌ +_ +p +ú +u +s +n +o +j +ó +a +ñ +m +y +á +v +é +x +' +c +ɨ +z +i +t +f +̱ +b +k + +í +́ +d +h diff --git a/models/mil/G_100000.pth b/models/mil/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3abfdcc295b8dfcf1a1f9491c2b7dcb174b01819 --- /dev/null +++ b/models/mil/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0feceac52dc567fd7e067188f939e12c34ff383259087f0130e28288a09499e7 +size 145493757 diff --git a/models/mil/config.json b/models/mil/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mil/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mil/vocab.txt b/models/mil/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e69940901623840d138ddfea7141b3e3324af617 --- /dev/null +++ b/models/mil/vocab.txt @@ -0,0 +1,50 @@ +a +g +ú +á +̀ + +q +c +í +s +o +ǐ +h +ǔ +j +z +ü +e +l +— +_ +t +y +ɨ +ó +n +i +d +è +m +p +v +̌ +b +à +ù +ǎ +k +ñ +‐ +ì +é +r +ǒ +́ +ò +u +ě +x +f diff --git a/models/mim/G_100000.pth b/models/mim/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4b1b9ed9b08775d2e08438be63b406609e523a8 --- /dev/null +++ b/models/mim/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c76f0203f723ee50d891f0f63caa8dcfd68538a9e7df50fd4570f97b70c32346 +size 145483760 diff --git a/models/mim/config.json b/models/mim/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mim/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mim/vocab.txt b/models/mim/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9fcd4a16632c6beddbf83cced8e103085719061b --- /dev/null +++ b/models/mim/vocab.txt @@ -0,0 +1,37 @@ +r +d +o +e +x +v +p +g +é +c +‐ +b + +j +l +ꞌ +_ +h +n +q +k +i +ñ +' +ó +z +a +s +u +m +f +̱ +ú +t +í +á +y diff --git a/models/min/G_100000.pth b/models/min/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..751f379f1a6eca1ff288ed20bbfebd127678e782 --- /dev/null +++ b/models/min/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c4830488bf5552d8d61266a161fd74cbc6e84b1d48eef5d0d866ed466e480fc +size 145482215 diff --git a/models/min/config.json b/models/min/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/min/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/min/vocab.txt b/models/min/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fa7555903e4e058521e3a7be2a7c15fc46197da9 --- /dev/null +++ b/models/min/vocab.txt @@ -0,0 +1,35 @@ +a +| +n +k +i +u +o +t +m +d +s +r +b +l +g +h +- +p +y +e +j +w +c +f +z +' +— +0 +4 +6 +1 +2 +5 +3 + diff --git a/models/mio/G_100000.pth b/models/mio/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d072e3862f8f860a60cb03229f12eb18bf9235be --- /dev/null +++ b/models/mio/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0f7b1f651f394a4de4485ae9f7844c8231b579042d0b70613af5946bf401e4 +size 145482249 diff --git a/models/mio/config.json b/models/mio/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mio/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mio/vocab.txt b/models/mio/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d55c89d259cc9b7ae8f2f896cb2ff0fd6b025552 --- /dev/null +++ b/models/mio/vocab.txt @@ -0,0 +1,35 @@ +| +a +i +c +n +u +h +t +o +d +r +v +ñ +s +e +y +q +m +x +j +l +p +b +g +f +z +ρ +̄ +- +í +ó +k +é +á + diff --git a/models/mip/G_100000.pth b/models/mip/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9324df9f154c84e60c4a31a204475cf272296b30 --- /dev/null +++ b/models/mip/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d4894ef90e8b427094fe7e9372a98c30631c11d03476c3fc326091c475b38a +size 145486084 diff --git a/models/mip/config.json b/models/mip/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mip/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mip/vocab.txt b/models/mip/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..387a79b860a2f9941b2b85739f098ad1cf17c4e2 --- /dev/null +++ b/models/mip/vocab.txt @@ -0,0 +1,40 @@ +s +c +a +y +ü +u +̶ +ú +̱ +x +m +t +d +e +p +5 +q +ñ +r +g +ó +v +í +- +k +— +h +i +j +é +á +n +_ +o +f +b +ꞌ + +z +l diff --git a/models/miq/G_100000.pth b/models/miq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8aa6dbbe96ecbf6bbe801117ae5a8bdcdd09512 --- /dev/null +++ b/models/miq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3fb79c90d554963e6812694eecb1c6510497c58e2aeef48bd48007156991054 +size 145486843 diff --git a/models/miq/config.json b/models/miq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/miq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/miq/vocab.txt b/models/miq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..86a134241ccc98f5327b1c816288957ddbdcf08b --- /dev/null +++ b/models/miq/vocab.txt @@ -0,0 +1,41 @@ +a +| +i +n +k +b +r +s +u +l +m +t +w +p +d +h +y +g +â +e +j +- +î +o +' +0 +1 +7 +2 +4 +5 +3 +6 +û +9 +8 +ê +f +v +ô + diff --git a/models/mit/G_100000.pth b/models/mit/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3163c42c4fd4d4f5626c39a806acebd9d211572 --- /dev/null +++ b/models/mit/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b20b933e4b903691f1929d90da2b51810e07497cc1745c737b1bd1cba57794e +size 145494509 diff --git a/models/mit/config.json b/models/mit/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mit/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mit/vocab.txt b/models/mit/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..652d92b03f9e2f23b7986c44297f36ff19247ba6 --- /dev/null +++ b/models/mit/vocab.txt @@ -0,0 +1,51 @@ +| +n +i +a +c +à +u +d +h +‐ +ì +s +á +t +e +ù +í +o +v +m +x +è +y +ñ +ú +r +é +q +ó +l +g +p +ò +j +b +â +î +f +z +ˋ +ê +ˊ +û +ô +ü +9 +k +3 +4 +0 + diff --git a/models/miy/G_100000.pth b/models/miy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..198decc6d03e8e33b1ee4b8aff083671d85ce25d --- /dev/null +++ b/models/miy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0cb02245acc0d24a9fd3c7aebba9944b5fcdd01e30d05757db0d14d5bf7fd91 +size 145486933 diff --git a/models/miy/config.json b/models/miy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/miy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/miy/vocab.txt b/models/miy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7628453aac551e1c0ac4730d96ea0bd9a70c6499 --- /dev/null +++ b/models/miy/vocab.txt @@ -0,0 +1,41 @@ +_ +n +̱ +m +ë +y +ú +g +c +h +ó +ñ +ö +j +ü +á +p +o +z +k +e +t +ꞌ +l +u +f +i +ï +r +v +q +d +­ +s +é + +x +ä +a +í +b diff --git a/models/miz/G_100000.pth b/models/miz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..36837dcc411910dee14423ad9009ec58c269e7f8 --- /dev/null +++ b/models/miz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f3528ae1f8dbd9b8c9901fa251d4d19c2a1aa2a4bf18f585b7f9b09c7a69e3 +size 145493731 diff --git a/models/miz/config.json b/models/miz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/miz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/miz/vocab.txt b/models/miz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5a1342bad718efb38110e17487990f80903bd7dc --- /dev/null +++ b/models/miz/vocab.txt @@ -0,0 +1,50 @@ +â +o +ē +y +e +u +d +g +ú +c +— +û +̂ +í +f +ō +s +ñ +i +v +k +̄ +ó +ā +́ +x +ī +p +j +b +n +l +ꞌ +q +t +r +h +ô +á + +ê +î +m +é +_ +a +ū +z +ɨ +ʼ diff --git a/models/mjl/G_100000.pth b/models/mjl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6697e1384f2cf3ee9b94f9b6b73fad40f8f7e80 --- /dev/null +++ b/models/mjl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:756690e05a53b5dcc7f3a42af3be76325f6d984d1f827d44981ac72da47bdd43 +size 145503740 diff --git a/models/mjl/config.json b/models/mjl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mjl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mjl/vocab.txt b/models/mjl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cdc61f7ede663299fb486170efb759a4e138a746 --- /dev/null +++ b/models/mjl/vocab.txt @@ -0,0 +1,63 @@ +| +ा +े +र +् +स +ह +क +ी +ि +ो +त +न +ं +म +ल +ज +य +प +ु +द +ब +ण +आ +ग +ू +व +ई +च +ख +भ +ए +ड +झ +़ +थ +ै +ठ +ध +ऊ +अ +ँ +ऐ +इ +फ +उ +ट +छ +ौ +- +ढ +घ +ः +ओ +ञ +' +ष +ृ +औ +2 +0 +7 + diff --git a/models/mjv/G_100000.pth b/models/mjv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..19c5ba800cf028182608b2fe43e5ee43bde2d3c3 --- /dev/null +++ b/models/mjv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d78a11f96b8561fec3ac3773c97b6d5a7ace86a043e81f1bc135ba67573a8a +size 145496933 diff --git a/models/mjv/config.json b/models/mjv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mjv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mjv/vocab.txt b/models/mjv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b115ac9a665864dc1f1cba8b188fc3d7d03633a2 --- /dev/null +++ b/models/mjv/vocab.txt @@ -0,0 +1,54 @@ +ള +ത +ി +ന +ൽ +ല +പ +േ + +ഞ +ം +എ +ു +ച +ൺ +ൂ +ർ +ആ +ണ +ോ +ഒ +6 +ീ +അ +' +ട +ഇ +ഈ +ങ +ജ +ഓ +3 +ഏ +ൾ +ാ +0 +െ +- +യ +റ +‍ +ശ +ഉ +് +4 +ഊ +വ +ക +ൊ +ൻ +ബ +ര +_ +മ diff --git a/models/mkl/G_100000.pth b/models/mkl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2c4f2fb630c94a80a51026467b1fc8b802b0721d --- /dev/null +++ b/models/mkl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a996bcb445f0e7a34f9ebde55da4c82b3f829d3f40251a7fbc2b4468cfd01212 +size 145492060 diff --git a/models/mkl/config.json b/models/mkl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mkl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mkl/vocab.txt b/models/mkl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..22add8bd11bed7958230a8031933d8eec0d180ad --- /dev/null +++ b/models/mkl/vocab.txt @@ -0,0 +1,48 @@ +| +ɑ +i +ɛ +n +ɔ +u +k +e +m +b +ŋ +s +o +d +w +y +l +̃ +í +j +̀ +t +g +f +c +r +p +h +́ +a +ù +z +ǹ +ì +ĩ +ò +ũ +à +v +ú +á +ń +é +ó +1 +è + diff --git a/models/mkn/G_100000.pth b/models/mkn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..be2180a420d800a49fd34d9d77b172d56267f32e --- /dev/null +++ b/models/mkn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed3b930ed6fd49f760489c7c7405d6d377aaa250d8a610980faebb15f553c71 +size 145479161 diff --git a/models/mkn/config.json b/models/mkn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mkn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mkn/vocab.txt b/models/mkn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a5d301a9d75bfc3b9c6ca58c8588403d3c28aa7b --- /dev/null +++ b/models/mkn/vocab.txt @@ -0,0 +1,31 @@ +g +m + +t +c +k +e +f +y +w +_ +l +n +h +i +u +í +b +á +o +é +a +- +r +d +p +j +' +s +ó +ú diff --git a/models/mlg/G_100000.pth b/models/mlg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a9f358fc879738b75cb79359e0fb57a330c8edfc --- /dev/null +++ b/models/mlg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11b9b97aa6ffa938569c2e46a5f8ef788c47fef99d5b61241eda2050d68275c +size 145478273 diff --git a/models/mlg/config.json b/models/mlg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/mlg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mlg/vocab.txt b/models/mlg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..740ac2c9b2ee51406e390981f4fdf999a89c6635 --- /dev/null +++ b/models/mlg/vocab.txt @@ -0,0 +1,30 @@ +a +| +n +i +y +o +r +t +m +e +h +s +k +f +z +d +l +' +v +p +b +j +- +g +à +ỳ +ô +ò +ì + diff --git a/models/mmg/G_100000.pth b/models/mmg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc2bec183ef465821d0365bc697a8fe4945cba36 --- /dev/null +++ b/models/mmg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d70095ea116dc7f505db049233771153123454eaa7a14ba0ab5094571888e22 +size 145481323 diff --git a/models/mmg/config.json b/models/mmg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mmg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mmg/vocab.txt b/models/mmg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b32754be1c50f5bb4a8a1256e63e6a3ded138824 --- /dev/null +++ b/models/mmg/vocab.txt @@ -0,0 +1,34 @@ +b +9 +6 +m +n +r +o +u +i +f +3 +s +8 +l +y +g +0 +1 +v +k +7 +w +e +- +_ +ō +j +a +2 + +h +4 +t +5 diff --git a/models/mnb/G_100000.pth b/models/mnb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d069ecec67c4fa6235b3de0acc1d3c0e2cc4933e --- /dev/null +++ b/models/mnb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e236d711d9c6fcb6641b4d87087d38941efb2d8b879883f25ab2e91766fcff4 +size 145474520 diff --git a/models/mnb/config.json b/models/mnb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mnb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mnb/vocab.txt b/models/mnb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24bf82e9d462b064fef749a44dd2c1be9d421232 --- /dev/null +++ b/models/mnb/vocab.txt @@ -0,0 +1,25 @@ +a +| +o +n +i +e +m +u +h +k +t +d +s +' +l +b +p +w +g +f +r +- +y +z + diff --git a/models/mnf/G_100000.pth b/models/mnf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4944209e9e28bebf6a067d1fc786a0edbc9b22e --- /dev/null +++ b/models/mnf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e57d02b298dccb1c6a2edcadee902e608df7f2a98286044ad42f28f732359a +size 145495303 diff --git a/models/mnf/config.json b/models/mnf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mnf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mnf/vocab.txt b/models/mnf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8cc1aa9bfba973e4d7bd97ec86e80196e06b7d35 --- /dev/null +++ b/models/mnf/vocab.txt @@ -0,0 +1,52 @@ +| +a +b +n +à +t +e +m +ɨ +ɔ +è +̧ +l +̀ +k +u +w +s +ˈ +g +i +ò +y +h +ŋ +d +ə +ê +f +o +z +ì +ù +̂ +p +ǎ +ȩ +j +û +v +â +̌ +ě +ǔ +ǒ +c +ǐ +î +ô +' +- + diff --git a/models/mnk/G_100000.pth b/models/mnk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a409ac5fbc2fbf089339d5bc114351393af6035a --- /dev/null +++ b/models/mnk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a51879892103bf27b0e33d1ff27f04e3a1cc77bbb79fdf81cd18b6859dc72c4d +size 145479131 diff --git a/models/mnk/config.json b/models/mnk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mnk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mnk/vocab.txt b/models/mnk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..420d1a4dfcc46eabdffd361e8bacc72c5d805ebe --- /dev/null +++ b/models/mnk/vocab.txt @@ -0,0 +1,31 @@ +| +a +o +i +e +l +u +n +k +t +ŋ +m +b +y +s +r +d +w +f +ñ +j +ì +h +- +p +ǹ +̀ +à +c +' + diff --git a/models/mnw/G_100000.pth b/models/mnw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4f42879ea6b85aa0d115f5e2354ee364abfab65f --- /dev/null +++ b/models/mnw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ad545a4094256af4b72c702c63f99b0fa179fa4b7eeb1a4f73c1aa73fc5076 +size 145501649 diff --git a/models/mnw/config.json b/models/mnw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mnw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mnw/vocab.txt b/models/mnw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b7f26c480656d6693a53b00890a03cac8a1a1d31 --- /dev/null +++ b/models/mnw/vocab.txt @@ -0,0 +1,60 @@ +ၝ +ါ +၆ +ဥ +ဗ +ဓ +ဍ +ၠ +ၚ +မ +ပ +ရ +ဴ +ၟ +လ +သ +ွ +ယ +ဨ +ဘ +အ +တ +ည + +ခ +ထ +ဣ +ဳ +ူ +က +ိ +ေ +ဒ +ီ +ျ +ဵ +ၜ +ဩ +_ +ဂ +ဇ +ဲ +ှ +္ +ဟ +် +ြ +စ +ံ +န +ဝ +ု +ဏ +ဖ +ဃ +ၞ +း +ဿ +ာ +ဆ diff --git a/models/mnx/G_100000.pth b/models/mnx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..92c136bef7ded9b981ebabb69676bb5cc1935b22 --- /dev/null +++ b/models/mnx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21346ab726fb27854995ed87dd4c63cd6e667574fc42096f2779ec3748cf09d9 +size 145473873 diff --git a/models/mnx/config.json b/models/mnx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mnx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mnx/vocab.txt b/models/mnx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b6de82747d05fe3d273b3d0f5583f3f74d2234c4 --- /dev/null +++ b/models/mnx/vocab.txt @@ -0,0 +1,24 @@ +j +u +c + +s +w +k +g +y +f +t +p +m +o +e +i +h +d +a +n +r +b +l +_ diff --git a/models/moa/G_100000.pth b/models/moa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c7ba0fbbbdeb6f2bec26ca78590965fc762c96b3 --- /dev/null +++ b/models/moa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a43a0a418a0de42d3db2dde238f7c37de9f595378da84b6779ea0765941b6a1 +size 145479935 diff --git a/models/moa/config.json b/models/moa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/moa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/moa/vocab.txt b/models/moa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e70e37ddfe30578dbb7fc851d4fda4ccd7f4f5c0 --- /dev/null +++ b/models/moa/vocab.txt @@ -0,0 +1,32 @@ +d +ɛ +a +_ +v +u +o +q +c +w +z +h +n +g +' +ɔ +- +i +t +b +f +k +s +e +m +ŋ + +j +y +p +r +l diff --git a/models/mog/G_100000.pth b/models/mog/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..da27abb42cd9b71c092bbc4a9af48f0d02cd33eb --- /dev/null +++ b/models/mog/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b8fb8dac5a1ca5bccd2fbe4edf29c624d4313cc53cb50349e27ae3fc13ef4f1 +size 145483775 diff --git a/models/mog/config.json b/models/mog/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mog/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mog/vocab.txt b/models/mog/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f27311ef9c2ef617a92ddac6391668ece606438b --- /dev/null +++ b/models/mog/vocab.txt @@ -0,0 +1,37 @@ +| +a +n +o +i +t +m +u +k +g +b +d +s +' +p +y +r +e +l +á +h +í +ḷ +w +- +ḻ +ú +j +ó +c +é +z +0 +4 +f +1 + diff --git a/models/mon/G_100000.pth b/models/mon/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a6a94f6f623c84f9df118df403cf4688abc438de --- /dev/null +++ b/models/mon/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90f6142050fad2689eb7b3a2f520c3a191fddbdb05c1c8f16aba7990baa0484e +size 145504368 diff --git a/models/mon/config.json b/models/mon/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mon/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mon/vocab.txt b/models/mon/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..59428b93016f7acb0560edd30fb009784115c514 --- /dev/null +++ b/models/mon/vocab.txt @@ -0,0 +1,64 @@ +| +а +э +н +г +р +д +и +х +ү +л +т +й +о +у +с +б +ѳ +м +ч +в +ж +ь +з +ы +е +щ +ц +я +ю +— +ё +п +к +ф +ъ +j +l +e +b +p +i +m +f +n +x +g +r +q +v +s +c +d +w +k +u +o +y +z +a +t +h +- + diff --git a/models/mop/G_100000.pth b/models/mop/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c33f8e06cb0616779ff0c7b516ff71530768a3ce --- /dev/null +++ b/models/mop/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58664c94fdf6b52efe6cec7f86c2e5c35cb2fb166b7b1c285f2e09bef601454c +size 145489139 diff --git a/models/mop/config.json b/models/mop/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mop/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mop/vocab.txt b/models/mop/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..05a96354c3352a8e92d7fdfe387897695a753e48 --- /dev/null +++ b/models/mop/vocab.txt @@ -0,0 +1,44 @@ +| +a +' +i +u +e +c +t +o +l +n +j +b +s +m +y +h +x +q +p +w +d +ü +z +r +— +g +- +f +v +é +0 +у +2 +1 +k +ñ +3 +5 +6 +4 +9 +7 + diff --git a/models/mor/G_100000.pth b/models/mor/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..59a84e76eab96dac154427bfdb7dfb47729629cb --- /dev/null +++ b/models/mor/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c151e3e1cdab006824b0bb0c4d4822dc1ae8eb196e637113d380b1b7a97437 +size 145481553 diff --git a/models/mor/config.json b/models/mor/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mor/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mor/vocab.txt b/models/mor/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ba04fab90acc40248a379be0d330f0949cc92a25 --- /dev/null +++ b/models/mor/vocab.txt @@ -0,0 +1,34 @@ +| +a +i +n +ǝ +e +ŋ +l +r +đ +g +ṯ +o +u +m +ɽ +w +ñ +d +ë +b +s +y +c +t +f +k +p +j +ḏ +- +v +' + diff --git a/models/mos/G_100000.pth b/models/mos/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ee684a53b7d49a547cf4f8eea02f7cf1bde8214 --- /dev/null +++ b/models/mos/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1c98a5d138e70679b690c5b15746571216468bfe3ed023a80c5a83169bfce17 +size 145482195 diff --git a/models/mos/config.json b/models/mos/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mos/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mos/vocab.txt b/models/mos/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..59b4e0f611c510c1671a24ce5aad1b441150335d --- /dev/null +++ b/models/mos/vocab.txt @@ -0,0 +1,35 @@ +| +a +n +e +b +m +g +s +o +y +d +l +t +ẽ +i +k +ã +r +ɩ +- +ʋ +w +z +p +u +õ +f +ɛ +ũ +ĩ +v +' +– +6 + diff --git a/models/mox/G_100000.pth b/models/mox/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6bfdbd3eeeeeb37063b246c8eb220a74872bf818 --- /dev/null +++ b/models/mox/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7bfc0c9f86ba291bacaf340e127d8942c6fd4eee3c7d19f7fe9d819d434b125 +size 145482983 diff --git a/models/mox/config.json b/models/mox/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mox/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mox/vocab.txt b/models/mox/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ed9971a04106572521281a2b786098211fbd5dd1 --- /dev/null +++ b/models/mox/vocab.txt @@ -0,0 +1,36 @@ +h +2 +7 +_ +i +8 +3 +v +w +1 +0 +5 +q +t +o +- +l +y +u +a + +6 +e +k +m +s +9 +f +d +4 +ʼ +n +b +' +— +g diff --git a/models/moz/G_100000.pth b/models/moz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9bbb4b10cf3695d3580274c3c259304c12940db --- /dev/null +++ b/models/moz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfd1d9dadf82274553239259d1f5a80b06f2503cd9d76cdeec0f40ea253d2fe +size 145486091 diff --git a/models/moz/config.json b/models/moz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/moz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/moz/vocab.txt b/models/moz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f68f17e9a6eb7da03ae3026b562abf13bb37c438 --- /dev/null +++ b/models/moz/vocab.txt @@ -0,0 +1,40 @@ +ó +ɗ +- +u +̰ +ɓ +_ +b +w +j +h +ƴ +— + +â +û +l +a +n +ú +z +á +î +ŋ +s +t +m +r +ô +d +p +c +e +ʼ +k +í +g +y +i +o diff --git a/models/mpg/G_100000.pth b/models/mpg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3c14931336f8edb5136b12ccc0910776efd0e06 --- /dev/null +++ b/models/mpg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fcb43eb2bbb54c73c84bc6c1e36f6751b4cdf86f2194497ab94b8fb0337b350 +size 145487717 diff --git a/models/mpg/config.json b/models/mpg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mpg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mpg/vocab.txt b/models/mpg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ee9faacb3f551fec992475bfe44c5f62c3a17f39 --- /dev/null +++ b/models/mpg/vocab.txt @@ -0,0 +1,42 @@ +| +a +i +m +n +u +d +g +e +l +t +k +' +o +s +h +b +r +z +y +w +v +j +c +p +â +á +f +î +- +ä +ê +ï +û +ô +à +ì +ü +q +ö +x + diff --git a/models/mpm/G_100000.pth b/models/mpm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e9186c906317228d4b13a15627b8e1e6ba011017 --- /dev/null +++ b/models/mpm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1743812c108fbc7506f7677f348beba087a48038c7323404c0bab6a57c8cf0b1 +size 145486079 diff --git a/models/mpm/config.json b/models/mpm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mpm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mpm/vocab.txt b/models/mpm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..746ce58121318c784874159f404afebe989bc2f3 --- /dev/null +++ b/models/mpm/vocab.txt @@ -0,0 +1,40 @@ +ɨ +i +_ +ū +p +' +í +a +d +ā +g +h +x +s +m +e +y +ī +b +ñ +é +n +̄ +v +z +l +q +c +k +j +t +u +ú +́ +r +o +f + +ó +á diff --git a/models/mpp/G_100000.pth b/models/mpp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..675719531c4d11ca6668da6221e3a4948908ea78 --- /dev/null +++ b/models/mpp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:013520856e7a0cdf25d896e9f99f81dda2bdba5ba5adb8c1c62ca2298c82d084 +size 145483765 diff --git a/models/mpp/config.json b/models/mpp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mpp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mpp/vocab.txt b/models/mpp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..44f7f608ee3abf06b38ec36246007265aec643d1 --- /dev/null +++ b/models/mpp/vocab.txt @@ -0,0 +1,37 @@ +_ +o +g +3 +l +i +4 +c +t +d +p +9 +u +­ +z +7 +n +b +a +2 + +5 +f +w +6 +' +k +e +h +1 +m +- +0 +r +s +8 +y diff --git a/models/mpx/G_100000.pth b/models/mpx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..428cbcc1aa1c6baa283b71641f9d36728029ca79 --- /dev/null +++ b/models/mpx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b364a9ddbdf077bb59a14ba73f876b2a51198104aff15e650471c22b26a009 +size 145479031 diff --git a/models/mpx/config.json b/models/mpx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mpx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mpx/vocab.txt b/models/mpx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bc98f8cac4e549800127a26f21bb9fc315c04416 --- /dev/null +++ b/models/mpx/vocab.txt @@ -0,0 +1,31 @@ +a +| +i +n +e +l +o +u +g +w +t +b +y +k +h +m +s +p +v +d +- +' +— +0 +1 +2 +6 +4 +3 +5 + diff --git a/models/mqb/G_100000.pth b/models/mqb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..12402319911518bb521a50af0fb1f710fbac91a9 --- /dev/null +++ b/models/mqb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5837d80d66d6c69f1762b52e4748cbcb136a52eefc6d383c6157d4a8a34bc7c6 +size 145489113 diff --git a/models/mqb/config.json b/models/mqb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mqb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mqb/vocab.txt b/models/mqb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bd9a5047487b5d0724babc4fc87f7881482ad936 --- /dev/null +++ b/models/mqb/vocab.txt @@ -0,0 +1,44 @@ +| +a +n +ə +y +k +e +m +t +w +s +i +o +l +u +h +r +ɗ +b +à +g +d +p +z +c +j +v +f +ʼ +ɓ +â +̀ +̂ +ù +ì +œ +á +é +î +- +û +ò +2 + diff --git a/models/mqf/G_100000.pth b/models/mqf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e2994f3addea9eb278aadbed878182920192461d --- /dev/null +++ b/models/mqf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1aeeaa62a53fc67ba1ed70437f71f282f4a06922c75f8160df869f001861fd +size 145489895 diff --git a/models/mqf/config.json b/models/mqf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mqf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mqf/vocab.txt b/models/mqf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..578f51d7d5db7fc284f0f9da3ae62db51686407a --- /dev/null +++ b/models/mqf/vocab.txt @@ -0,0 +1,45 @@ +| +e +o +a +n +t +r +b +m +k +i +y +s +u +w +á +- +ò +ó +à +í +é +ù +è +ì +ú +0 +4 +1 +2 +l +5 +p +3 +6 +9 +7 +h +8 +d +j +c +' +g + diff --git a/models/mqj/G_100000.pth b/models/mqj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..730da21160d07f056425c80cc25e6c54d76ea96b --- /dev/null +++ b/models/mqj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef68097dbb9bf0661f1e33c52f2031a6c4bb04ce4970d474f88f7219a688bda +size 145483765 diff --git a/models/mqj/config.json b/models/mqj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mqj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mqj/vocab.txt b/models/mqj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b34cbbfa2f76edac1684e0eef5ffa3586907251c --- /dev/null +++ b/models/mqj/vocab.txt @@ -0,0 +1,37 @@ +j +- +d +z +8 +p + +l +2 +5 +h +m +n +s +1 +6 +e +y +7 +4 +3 +k +f +t +r +i +' +a +_ +b +w +u +o +9 +c +0 +g diff --git a/models/mqn/G_100000.pth b/models/mqn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4aadf30729fce1867791608ab682b828ba81665a --- /dev/null +++ b/models/mqn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b60314d3662ec6d7e860f2d40fdfce3873ac688796292a41e1937387fd4af7d1 +size 145479141 diff --git a/models/mqn/config.json b/models/mqn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mqn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mqn/vocab.txt b/models/mqn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc79b1b27f8ec3ff8cdb56da171761f68d7c7c29 --- /dev/null +++ b/models/mqn/vocab.txt @@ -0,0 +1,31 @@ +1 +3 +u +k +t +s +a +o +d +b +p +h +9 +g +c +j +r + +m +e +_ +z +l +f +i +- +n +y +' +4 +w diff --git a/models/mrw/G_100000.pth b/models/mrw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8b7b7ac45bda697e9c52b572598220e54cb88db --- /dev/null +++ b/models/mrw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221c9f831f5c8ca22a6f384354e34364da5c35ac8fea15086b2a38504151fbb3 +size 145472245 diff --git a/models/mrw/config.json b/models/mrw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mrw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mrw/vocab.txt b/models/mrw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e59c4021abca9b1769841f24b131be6528421c29 --- /dev/null +++ b/models/mrw/vocab.txt @@ -0,0 +1,22 @@ +a +| +n +o +i +k +s +g +e +m +y +t +p +r +l +d +b +w +h +' +- + diff --git a/models/msy/G_100000.pth b/models/msy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..86ec48af33aef763ce72cd23e9d0989cdba7e654 --- /dev/null +++ b/models/msy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc36662ce1ea3fae782b7c7d978e71fb205b5d4c13ed495284d90e2e9271a1aa +size 145484537 diff --git a/models/msy/config.json b/models/msy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/msy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/msy/vocab.txt b/models/msy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..67432a1210a438b7b2a03c8d224f23985ed8c280 --- /dev/null +++ b/models/msy/vocab.txt @@ -0,0 +1,38 @@ +a +| +i +g +m +ɨ +r +n +e +h +z +u +k +b +o +t +v +d +s +p +f +l +j +0 +w +1 +2 +7 +4 +' +3 +y +5 +6 +9 +8 +- + diff --git a/models/mtd/G_100000.pth b/models/mtd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..46890a387fd61c81e308f7d55d132a09c85504db --- /dev/null +++ b/models/mtd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33dbef3d050b2a3a8568438ffca68d66607a007c6ca9715dfafa8a2283b1978b +size 145474545 diff --git a/models/mtd/config.json b/models/mtd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mtd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mtd/vocab.txt b/models/mtd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bc01ec9c31893d8fa9348daeab9bdad7c295f2d6 --- /dev/null +++ b/models/mtd/vocab.txt @@ -0,0 +1,25 @@ +m +a +b +_ +- +t +r +y +c +i +o +s +g +p +h +ꞌ +d +l +j +e + +w +n +k +u diff --git a/models/mtj/G_100000.pth b/models/mtj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..845fc29f23a1b914db2c657e49842c09110e71a7 --- /dev/null +++ b/models/mtj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253efab8957740f5039816c9486b5158debdd3994b6d3f2b62b5f55fc7319d94 +size 145479915 diff --git a/models/mtj/config.json b/models/mtj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mtj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mtj/vocab.txt b/models/mtj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..caefe28c5087556a55982cabdbeda533ba799ffc --- /dev/null +++ b/models/mtj/vocab.txt @@ -0,0 +1,32 @@ +o +e +y +j +k +h +t +r +0 +2 +f +n +s +i +' +z +g +u +d +4 +- +1 +c +a +b +l +m + +_ +w +6 +p diff --git a/models/mto/G_100000.pth b/models/mto/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..976d30be13837d697070ea916edc1018aecfcbd2 --- /dev/null +++ b/models/mto/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6136f1b77bed2ecf005ff278cb33c2c36fda442525d3741e05744da996988d28 +size 145486071 diff --git a/models/mto/config.json b/models/mto/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mto/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mto/vocab.txt b/models/mto/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3bd4c316aa4d4619dc0c34072691894aed7398d2 --- /dev/null +++ b/models/mto/vocab.txt @@ -0,0 +1,40 @@ +l +r +z +4 +e +i +q +h +ú +í +y +v +c +ó +_ +9 +0 +w +ñ +' +t +n + +s +o +u +k +p +d +̱ +f +a +x +é +b +j +m +1 +g +á diff --git a/models/muh/G_100000.pth b/models/muh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6b1ddda8a7424c4d795a80a9f0b0a8e4eea9806 --- /dev/null +++ b/models/muh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdb013f4491f89e320a43ae68c73f46ec676b8adce0031035ef498f58ee82f62 +size 145479938 diff --git a/models/muh/config.json b/models/muh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/muh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/muh/vocab.txt b/models/muh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d9651d4c3a68b025b55336a49b521b9805f87dde --- /dev/null +++ b/models/muh/vocab.txt @@ -0,0 +1,32 @@ +| +a +e +ü +n +k +g +i +r +t +m +b +d +w +l +y +u +f +p +o +ï +s +h +- +z +ö +ʼ +c +j +' +v + diff --git a/models/mup/G_100000.pth b/models/mup/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..08de900cb84c9858199746a8fedbf736ee8ea45d --- /dev/null +++ b/models/mup/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5162189e4b7456d196ac2cb37169ba43d8e5f312b4d303719b875459dee49df +size 145499121 diff --git a/models/mup/config.json b/models/mup/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mup/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mup/vocab.txt b/models/mup/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e12bef0c552156dc35c13c5b40e53ed89ec25416 --- /dev/null +++ b/models/mup/vocab.txt @@ -0,0 +1,57 @@ + +अ +ऊ +ढ +ब +द +- +ई +ट +ा +उ +ज +ओ +फ +झ +इ +ठ +ञ +ो +ु +़ +ः +ऐ +स +म +ण +ं +ग +ध +_ +प +र +थ +े +ळ +व +ष +भ +् +य +त +' +छ +ह +‍ +क +ू +न +घ +ल +च +ए +आ +ी +ि +ख +ड diff --git a/models/mur/G_100000.pth b/models/mur/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b774bfc925e0fc3a33bc7469457b6312d9731d7 --- /dev/null +++ b/models/mur/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635a0267a5996b4509083146eaef448a2713507b419eb628e7d15e56986fedbb +size 145482999 diff --git a/models/mur/config.json b/models/mur/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mur/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mur/vocab.txt b/models/mur/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1f3e11e1167d2d1edd388297a24b059df570d66c --- /dev/null +++ b/models/mur/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +n +o +e +k +ɔ +t +l +u +ɛ +g +c +r +z +y +m +ŋ +b +d +w +j +s +v +p +ã +á +ḏ +ṯ +0 +1 +2 +5 +4 + diff --git a/models/muv/G_100000.pth b/models/muv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c6144cdb85161025e972132e76758a5152eb1a4 --- /dev/null +++ b/models/muv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b00fcf3d144199eddd9f39c191229dd4d53d6fb835ae1218353617deb040688f +size 145491429 diff --git a/models/muv/config.json b/models/muv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/muv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/muv/vocab.txt b/models/muv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..28ba02f77f186fe8afe97e8a065e6cc307432cb5 --- /dev/null +++ b/models/muv/vocab.txt @@ -0,0 +1,47 @@ +ഒ +് +അ +ട +ണ +ര +ൂ + +ള +ോ +ഏ +ല +ച +ീ +റ +ആ +4 +എ +െ +ം +വ +ഇ +ക +ഊ +യ +3 +പ +' +ർ +6 +ഞ +ഉ +ൊ +ങ +ഓ +േ +ന +0 +ു +ശ +ഈ +ാ +ി +മ +ത +_ +ൻ diff --git a/models/muy/G_100000.pth b/models/muy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d182c4cad6cd39ef096d0090da07ac9c6f0dfba8 --- /dev/null +++ b/models/muy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cae07bc20b1d59447399e4277b817901014b5aac3dc358440af09604b879d7e +size 145492235 diff --git a/models/muy/config.json b/models/muy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/muy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/muy/vocab.txt b/models/muy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..63a9af8de00ad573058284713b61280833ecc635 --- /dev/null +++ b/models/muy/vocab.txt @@ -0,0 +1,48 @@ +f +è +ə +d +̂ +ì +k +e +é +l +o +à +ù +̀ +_ +j + +z +t +ɓ +î +- +ê +g +ô +́ +a +h +y +í +s +ŋ +ò +á +c +ɗ +i +m +r +ʉ +w +b +u +â +p +n +ú +v diff --git a/models/mvp/G_100000.pth b/models/mvp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed153b7766ca5f19a06e17933996b5ef45ac853b --- /dev/null +++ b/models/mvp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d391ee944a7f6d7217104a00c5077d387395d98e1bee723cba804a3403bab356 +size 145476837 diff --git a/models/mvp/config.json b/models/mvp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mvp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mvp/vocab.txt b/models/mvp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..de90a2d717686b944f0cf4183fbb7b27cc9bb607 --- /dev/null +++ b/models/mvp/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +i +o +t +u +m +l +k +g +s +e +p +' +r +d +b +j +h +- +w +c +y +f +z +q + diff --git a/models/mwq/G_100000.pth b/models/mwq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4e8ed14df132fc41a9f779e5efe45f83d08a336c --- /dev/null +++ b/models/mwq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ce268fabd94650da5ca6561d4824b7ec8f11d8ad126a8f3ba05860e7eac6ee +size 145489121 diff --git a/models/mwq/config.json b/models/mwq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mwq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mwq/vocab.txt b/models/mwq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..070b4fecc4b4fd8adc432bf930ea33bfc5c34045 --- /dev/null +++ b/models/mwq/vocab.txt @@ -0,0 +1,44 @@ +| +a +i +n +h +k +u +g +c +m +e +t +w +p +ä +s +l +' +ü +b +o +v +y +d +z +r +j +- +f +x +è +é +ë +á +0 +6 +ô +4 +î +ç +1 +õ +q + diff --git a/models/mwv/G_100000.pth b/models/mwv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2d03fdfb7378015ffe04cdf7abbc0bd4a9b98ca4 --- /dev/null +++ b/models/mwv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb7aea558c20fd3d0d71db6d83a854b86fa37ed89e0c4d116ff0de8b5d784bac +size 145479139 diff --git a/models/mwv/config.json b/models/mwv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mwv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mwv/vocab.txt b/models/mwv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6c647834d1da41c72efe178541c3a6f515cd4233 --- /dev/null +++ b/models/mwv/vocab.txt @@ -0,0 +1,31 @@ +a +| +i +u +n +e +k +t +s +m +g +l +b +p +r +o +d +y +j +h +c +' +- +f +v +w +z +0 +4 +1 + diff --git a/models/mxb/G_100000.pth b/models/mxb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..52a1af15611a12d5f42624ac63ac095d5c662a8c --- /dev/null +++ b/models/mxb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0370fe171899fb3c6bef91f2c9cbcf794ab52715f913f0276ad3c0a898637cb8 +size 145491335 diff --git a/models/mxb/config.json b/models/mxb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mxb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mxb/vocab.txt b/models/mxb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8a548d8ea4cbe751d7195fcdbc6db9d7bb9a8984 --- /dev/null +++ b/models/mxb/vocab.txt @@ -0,0 +1,47 @@ +ñ +2 +r +e +k +̱ +ú +y +8 +é +0 +h +5 +s +' +_ +- +t +4 + +á +d +1 +q +i +6 +p +z +3 +í +b +l +9 +v +j +o +g +c +7 +u +a +f +x +m +ü +ó +n diff --git a/models/mxq/G_100000.pth b/models/mxq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1bada068c0c733291390d797c65cf5e5a5bd5ed7 --- /dev/null +++ b/models/mxq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e15fde31a6a6a9cbfdff5a358d69498edf57e812f266944f765a4c29a9ade084 +size 145482891 diff --git a/models/mxq/config.json b/models/mxq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mxq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mxq/vocab.txt b/models/mxq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7089c0230198a83dfa07e07259f7d1fe538f28c4 --- /dev/null +++ b/models/mxq/vocab.txt @@ -0,0 +1,36 @@ +s +x +á +p +b +y +ó +w +i +e +́ +_ +ʉ +é +f + +̱ +— +u +z +m +r +o +d +t +k +a +í +l +' +n +ú +j +ñ +v +g diff --git a/models/mxt/G_100000.pth b/models/mxt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a011919a1baa07c5a8b3e5d18a4d795b72b2bb10 --- /dev/null +++ b/models/mxt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8182d34e59ab7165edb7ef6349d55f3b6718212f86b564ad99dee188ac0d5802 +size 145483735 diff --git a/models/mxt/config.json b/models/mxt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mxt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mxt/vocab.txt b/models/mxt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ebef750268b48c019937b89f7dfe52d463dcb209 --- /dev/null +++ b/models/mxt/vocab.txt @@ -0,0 +1,37 @@ +i +q +ó +h +̱ +r +k +j +é +f +c +s +í +x +ṉ +z +ɨ +l +v +b +n +p +a + +u +ñ +o +m +́ +e +t +ú +d +_ +g +á +y diff --git a/models/mxv/G_100000.pth b/models/mxv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..69fd84b7f2a7b841bbf64a4b7849b8b8edbb7bc2 --- /dev/null +++ b/models/mxv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d448e3fac4a5f41979162d2d604497a260a5a4f40381dcbfa2e6a34970169df6 +size 145491463 diff --git a/models/mxv/config.json b/models/mxv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mxv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mxv/vocab.txt b/models/mxv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ae29440ff935b04813921644f03bdf9d494a22f1 --- /dev/null +++ b/models/mxv/vocab.txt @@ -0,0 +1,47 @@ +h +e +ò +o +v +l +u +j +s +ö +q +k +ù +é +f +à +g +p +_ +ï +í +ì +â + +è +r +̱ +ꞌ +t +ñ +á +î +ë +ü +d +n +ó +a +ú +x +b +m +ä +c +z +i +y diff --git a/models/mya/G_100000.pth b/models/mya/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..85f6ccf6ae2d8d164908d3d300f6567a8a3a826c --- /dev/null +++ b/models/mya/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4d30f345871507d5bb9f37bb5b520bf2ea41dab92125edc9257338749bf828 +size 145499143 diff --git a/models/mya/config.json b/models/mya/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mya/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mya/vocab.txt b/models/mya/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..bb81ac28f3652206d1a38e5b7a1c9f727922fbcc --- /dev/null +++ b/models/mya/vocab.txt @@ -0,0 +1,57 @@ +် +ာ +ု +ိ +း +ေ +သ +က +င +တ +့ +မ +ြ +ည +ရ +အ +န +လ +ှ +ပ +စ +ခ +ျ +ူ +ွ +ါ +ထ +ဖ +ံ +ယ +ဆ +ီ +ဲ +ဟ +ဘ +ဝ +္ +ဉ +ဤ +ဇ +ဒ +ဂ +ဦ +ဏ +ဗ +ဓ +ဧ +ဥ +ဩ +ဌ +ဋ +' +ဣ +ဍ +ဿ +ဈ + diff --git a/models/myb/G_100000.pth b/models/myb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e9e60e774fc03cf5c821f0d19d610b11d049e39 --- /dev/null +++ b/models/myb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95f5f4dbc99c15a8a9d75885bf921a882b81a15a82d37d1ff7943d1800837f23 +size 145486057 diff --git a/models/myb/config.json b/models/myb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/myb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/myb/vocab.txt b/models/myb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5f7a63374a42522a19a9e02804c1ffc172f18dd2 --- /dev/null +++ b/models/myb/vocab.txt @@ -0,0 +1,40 @@ +| +a +n +k +ï +t +d +e +o +g +l +i +m +r +ø +j +u +b +s +' +h +y +ð +w +þ +î +õ +ã +p +é +ë +à +á +ü +ò +- +ó +ú +ù + diff --git a/models/myk/G_100000.pth b/models/myk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c402a7462ffe7dd1089e40371a4afcb36f50dbb --- /dev/null +++ b/models/myk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82da4148498ccba374518e4d62f6cd301193141bbb550ea2f6d84ee521a688b3 +size 145490693 diff --git a/models/myk/config.json b/models/myk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/myk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/myk/vocab.txt b/models/myk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e52349a6531dccdabdf08f6355be668301c2859b --- /dev/null +++ b/models/myk/vocab.txt @@ -0,0 +1,46 @@ +| +a +i +n +e +ɛ +u +w +y +k +p +o +g +l +b +r +m +s +ɔ +h +t +ɲ +d +f +ᴐ +ŋ +ʼ +j +c +' +z +x +` +è +à +v +á +ù +- +ì +í +é +ò +̀ +́ + diff --git a/models/myl/G_100000.pth b/models/myl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d74e29ed4114e546675e5ecf037248c75122d38 --- /dev/null +++ b/models/myl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52008aa5201aa018d4c8ac7a242d962b5461ec598e09675f7173a154f5196301 +size 145476835 diff --git a/models/myl/config.json b/models/myl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/myl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/myl/vocab.txt b/models/myl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..47619f591cf99897b5fc63a95bb992d8691fe348 --- /dev/null +++ b/models/myl/vocab.txt @@ -0,0 +1,28 @@ +k +b +d +– +n +t +c +z +h +j +y +u +i +m +l +p +g +e +' +- +o +a +_ +w +s + +r +f diff --git a/models/myv/G_100000.pth b/models/myv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0248f77e348dad41b2895adfbaad6d226e106329 --- /dev/null +++ b/models/myv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6fa37ec605861eaf1ab51a9b37724b63b975ebd15c019428aefc60a2552b69b +size 145482979 diff --git a/models/myv/config.json b/models/myv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/myv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/myv/vocab.txt b/models/myv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9a01a8dbd1388544108b4a9bb8a5d410537d6de5 --- /dev/null +++ b/models/myv/vocab.txt @@ -0,0 +1,36 @@ + +ш +м +ц +ф +т +_ +ы +к +ж +р +о +ё +ч +е +и +у +с +ь +э +а +в +н +з +л +й +г +щ +я +п +ю +д +- +х +б +– diff --git a/models/myx/G_100000.pth b/models/myx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8dc91b772916533ac1901d9097f8c719853822d4 --- /dev/null +++ b/models/myx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8604785ff97af33b6c9196f7deeeec9f5de46caa0741c35d9ccd219480947171 +size 145479917 diff --git a/models/myx/config.json b/models/myx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/myx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/myx/vocab.txt b/models/myx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a6713d28c4809cd81b4db2efdf92a4dab051a6d8 --- /dev/null +++ b/models/myx/vocab.txt @@ -0,0 +1,32 @@ +| +a +i +u +e +n +b +k +l +o +s +h +w +m +y +t +r +d +g +f +' +p +ŋ +j +z +v +- +0 +c +4 +1 + diff --git a/models/myy/G_100000.pth b/models/myy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9d8e96625f6a2afe32e409d45dad1ceddfde6b3 --- /dev/null +++ b/models/myy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ae8a03313f7cc7dd1a40204fbde3ff2ba1feb6e69e1ba9ea30c89570f5bef0 +size 145490679 diff --git a/models/myy/config.json b/models/myy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/myy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/myy/vocab.txt b/models/myy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..18088e4c21a4953747e65685d3fc0269d90be6fb --- /dev/null +++ b/models/myy/vocab.txt @@ -0,0 +1,46 @@ +̃ +g +b +õ +ñ +z +‍ +é +ʉ + +a +p +w +u +f +r +e +ũ +x +m +n +— +l +́ +ú +y +_ +ẽ +k +v +ó +t +q +s +' +ü +j +ã +h +d +o +c +i +ĩ +á +í diff --git a/models/mza/G_100000.pth b/models/mza/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e8fae0738659bf57e9354a5cafdd90cd074cedc8 --- /dev/null +++ b/models/mza/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66af95e8229831457ca48c19bbe817765fec44caeb45f3c77b495f665443cd4b +size 145493745 diff --git a/models/mza/config.json b/models/mza/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mza/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mza/vocab.txt b/models/mza/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cbf887d1e1f16200a085e18c9c20f068d7064f8d --- /dev/null +++ b/models/mza/vocab.txt @@ -0,0 +1,50 @@ +| +a +n +i +u +' +t +k +d +o +r +c +h +ñ +m +v +y +e +j +à +s +ù +ì +ā +ī +g +l +ü +̱ +x +ö +í +ū +p +á +ē +b +é +ú +ï +è +ä +ó +f +ō +z +ò +q +ë + diff --git a/models/mzi/G_100000.pth b/models/mzi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..820c5a1fe7a086736b9546ce1136f33b4b29a85a --- /dev/null +++ b/models/mzi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dec236c163fbcaa59d65f790be49daafe150c13d103963b8e81a5e4ba88ff96 +size 145482230 diff --git a/models/mzi/config.json b/models/mzi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mzi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mzi/vocab.txt b/models/mzi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..78863efa3271812b63379a66a6d8b3d81ebd9dca --- /dev/null +++ b/models/mzi/vocab.txt @@ -0,0 +1,35 @@ +i +g +ó +a +ú +ñ +o +c +k +ń +̱ +h +y +ṉ +s +á +f +u +' +é +d +r +n +b +– +t +j +m +_ +p + +x +í +l +e diff --git a/models/mzj/G_100000.pth b/models/mzj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..84050cdd80e8b6ff64d12e0c3509856cdf355683 --- /dev/null +++ b/models/mzj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ba7d13f525e9e3bdf0c387fa8b6f214870fe13887c93b123d564b34e101f22 +size 145480703 diff --git a/models/mzj/config.json b/models/mzj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mzj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mzj/vocab.txt b/models/mzj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9ac71a218b34dab0e9f8da824357aea0445f388f --- /dev/null +++ b/models/mzj/vocab.txt @@ -0,0 +1,33 @@ +m +ɔ +o +l +s +t +i +ɛ +z +̀ +v +' +ǹ +d +a +h +à +w +́ +e +y +b +p +n +u +ǃ + +f +g +ɲ +k +j +_ diff --git a/models/mzk/G_100000.pth b/models/mzk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4872e3955dbe6fa791d70ad4b48be23e090d9473 --- /dev/null +++ b/models/mzk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42f87b3c0fe801e3f11d999e2d277b82eae0168c0c9e76f5bc0d7f676c904f57 +size 145486847 diff --git a/models/mzk/config.json b/models/mzk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mzk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mzk/vocab.txt b/models/mzk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e08523a0ea98f8753e7598c1a39a1f25292c75d8 --- /dev/null +++ b/models/mzk/vocab.txt @@ -0,0 +1,41 @@ +| +h +e +n +a +l +b +i +o +u +d +é +à +m +s +g +k +t +j +r +w +- +ò +è +y +c +á +ú +p +v +ó +ì +f +í +ù +' +z +x +1 +2 + diff --git a/models/mzm/G_100000.pth b/models/mzm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d816068cecc31c296cd7c6bb2f875ab44b34211 --- /dev/null +++ b/models/mzm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dd6df912d2762e1821734b5b2bb3003fd084d031ce85da6cfb8fb79f67546ed +size 145499125 diff --git a/models/mzm/config.json b/models/mzm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mzm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mzm/vocab.txt b/models/mzm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2674ee8609ab651f8b6f82cf9e6ea9a06f3ef292 --- /dev/null +++ b/models/mzm/vocab.txt @@ -0,0 +1,57 @@ +| +a +n +ɔ +ɛ +g +i +u +k +ɓ +r +t +e +s +z +y +l +p +w +o +d +b +h +m +v +ã +̃ +j +ì +̀ +́ +à +á +ũ +f +í +è +ù +é +ú +ñ +ò +ǎ +ń +ó +' +õ +ĩ +ǹ +ṹ +ǐ +̌ +ŏ +̆ +ň +- + diff --git a/models/mzw/G_100000.pth b/models/mzw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff357659afea2aa9ff69bfa5d6c6848115331643 --- /dev/null +++ b/models/mzw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:025fb49ebe87e3fd9ce12cd8b73b7031c3b3fb7fc46d5d9e95f8b393f8c481e1 +size 145486047 diff --git a/models/mzw/config.json b/models/mzw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/mzw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/mzw/vocab.txt b/models/mzw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..86edc5726e4e4867022de1c8f98c24fcdc76aa3a --- /dev/null +++ b/models/mzw/vocab.txt @@ -0,0 +1,40 @@ +g +c +ń +ó +ŋ +- +k +é +ú +r +b +m +ô +́ +p +o +_ +s +â +d +j +e +l +v +u +w +h +' +n +ɛ +t +y +6 +å +a +f + +i +á +ɔ diff --git a/models/nab/G_100000.pth b/models/nab/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..62efd6d0d5d1a22ef03fa179b2d3b62c2af5d038 --- /dev/null +++ b/models/nab/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4754d0e0604918a9186a6f5e307b321c5c13bafadf94b84da2c16edb0dd983 +size 145486821 diff --git a/models/nab/config.json b/models/nab/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nab/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nab/vocab.txt b/models/nab/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bbb7d753055de5a6c9108dc10883c8ca0af7ed3c --- /dev/null +++ b/models/nab/vocab.txt @@ -0,0 +1,41 @@ +v +' +6 +0 +k +u +j +i +ĩ +8 +7 +ẽ +̱ +m +e +a +w +— +2 +4 +f +t +ũ +s +õ +_ +y +n +l +r +1 +o +5 +3 +9 +x +h + +p +b +ã diff --git a/models/nag/G_100000.pth b/models/nag/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d78289e54246510114f3a1e9a87bb80ee035b550 --- /dev/null +++ b/models/nag/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc3dc65c91d98f173195a550b385e11a026e5a8b81bf50b0f048eba5a8c4591 +size 145483755 diff --git a/models/nag/config.json b/models/nag/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nag/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nag/vocab.txt b/models/nag/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..97f09b778b38ba5eb2bf5ca0c2d62adab78c4593 --- /dev/null +++ b/models/nag/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +e +o +k +h +n +t +r +u +s +b +l +m +p +g +j +d +c +y +w +- +v +z +f +' +0 +x +4 +q +1 +5 +3 +2 +6 + diff --git a/models/nan/G_100000.pth b/models/nan/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e904e66fc95b9d6c89a2204ba87ac32b25d6dd0 --- /dev/null +++ b/models/nan/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85db914c1c91bdc98d36dac9ffdf3fb47f44d130b5390f5e6a422d8f9e9f17a7 +size 145492333 diff --git a/models/nan/config.json b/models/nan/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nan/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nan/vocab.txt b/models/nan/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dc6abee9ee61779de033b1d31807fc6da190c8f0 --- /dev/null +++ b/models/nan/vocab.txt @@ -0,0 +1,48 @@ +| +h +- +i +n +t +k +g +a +c +s +l +o +ê +̍ +e +â +ó +ō +p +ū +í +ā +á +u +ī +m +͘ +b +à +è +ò +ì +ô +ú +ù +î +ē +j +û +̄ +é +ǹ +̂ +ń +' +ḿ + diff --git a/models/nas/G_100000.pth b/models/nas/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c424684aeb465d4a314b3a836b427c6136eeb4c8 --- /dev/null +++ b/models/nas/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2482abe9aa1b64293e2f81af865901ea9ef16b847f843d3d7327b36f2350dded +size 145480691 diff --git a/models/nas/config.json b/models/nas/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nas/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nas/vocab.txt b/models/nas/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..683f87d8776d3c46f4299fc6025378bb4cb177cc --- /dev/null +++ b/models/nas/vocab.txt @@ -0,0 +1,33 @@ +a +| +n +o +i +e +' +k +u +t +r +g +m +p +d +- +v +b +s +j +l +0 +– +1 +6 +4 +2 +7 +8 +5 +3 +h + diff --git a/models/naw/G_100000.pth b/models/naw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..744de498fa31006c15572b419fe33c31ccad2ff9 --- /dev/null +++ b/models/naw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d727d28c12d716b8ceedd3aa4a47e725ddf936073537f3e305b36ed07d8c603 +size 145479266 diff --git a/models/naw/config.json b/models/naw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/naw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/naw/vocab.txt b/models/naw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..42a21fd97f2d8d2c85c26d743fe2ae20d09d9cfc --- /dev/null +++ b/models/naw/vocab.txt @@ -0,0 +1,31 @@ +| +a +ɩ +n +ɛ +ʋ +m +k +i +b +ɔ +y +s +t +l +u +f +w +g +e +r +p +o +d +‐ +ŋ +h +' +á +́ + diff --git a/models/nca/G_100000.pth b/models/nca/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0b0433345d5d6bcbcbb01f0daf9625e8779770ec --- /dev/null +++ b/models/nca/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e551bb99f2c87560cd6d36639cfb306cdaa7735eb2cf2dbcf4039ddc4886afff +size 145489233 diff --git a/models/nca/config.json b/models/nca/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nca/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nca/vocab.txt b/models/nca/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ad6ebebbe7fc441e5bb77314cfd293fb925d4828 --- /dev/null +++ b/models/nca/vocab.txt @@ -0,0 +1,44 @@ +q +g +o +i +f +v +1 +6 +8 +' +ú + +t +d +w +_ +u +n +a +ŋ +ó +j +2 +é +b +l +z +r +e +á +7 +m +3 +0 +y +4 +p +k +s +í +h +5 +9 +- diff --git a/models/nch/G_100000.pth b/models/nch/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f5fdb732cf4703127385fafbd27ecd63d090b60 --- /dev/null +++ b/models/nch/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddabd4ee9d43b6e39239438136847d17e089db73e1247fafddfe3690f5a9cbff +size 145484523 diff --git a/models/nch/config.json b/models/nch/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nch/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nch/vocab.txt b/models/nch/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9f9c26c12e8e061b56b5ca8de35e6a8fc55c4757 --- /dev/null +++ b/models/nch/vocab.txt @@ -0,0 +1,38 @@ +a +| +i +t +u +n +c +e +o +h +m +q +l +j +s +p +y +r +x +z +í +d +ú +b +á +— +é +f +' +g +ó +v +0 +1 +2 +k +4 + diff --git a/models/ncj/G_100000.pth b/models/ncj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4a041eb8a5df1a27618bf17b6e957485321fd367 --- /dev/null +++ b/models/ncj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4d68693a8c9bc7703f760ecf6375b755d3be2c3af1c001c7db734c48a2c7c33 +size 145480705 diff --git a/models/ncj/config.json b/models/ncj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ncj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ncj/vocab.txt b/models/ncj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ccec9f5d3aac079ec6f55f027e9b787b6df4bf43 --- /dev/null +++ b/models/ncj/vocab.txt @@ -0,0 +1,33 @@ +| +i +a +n +u +t +h +o +e +c +l +q +m +s +y +p +x +d +z +j +r +ú +b +g +é +á +f +í +v +ó +k +ñ + diff --git a/models/ncl/G_100000.pth b/models/ncl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ce5963dc5290b8f9acb42e8a4c0721271af593ba --- /dev/null +++ b/models/ncl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fce6ccbf3e93b2776796e1d1a44a5b7b212ef27075aeafc4136eb6f3086164b +size 145483787 diff --git a/models/ncl/config.json b/models/ncl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ncl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ncl/vocab.txt b/models/ncl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c660b0322f4fe3c11fc510a8c29df967ffbf8547 --- /dev/null +++ b/models/ncl/vocab.txt @@ -0,0 +1,37 @@ +ꞌ +h +l +v +n +o +m +g +f +z +0 +_ +ú +y +d +é +c +a +x +e +á +b +q +r +k + +p +u +2 +s +ó +j +í +t +i +— +ñ diff --git a/models/ncu/G_100000.pth b/models/ncu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..33a10305dacfc9c9d3625b755ff5b00f103e5f1c --- /dev/null +++ b/models/ncu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca6e867f3feaf73a6cdfd65a8e502601d2610496875f2c7de6bb1030fe78706 +size 145479133 diff --git a/models/ncu/config.json b/models/ncu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ncu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ncu/vocab.txt b/models/ncu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6af2fbca8eeed69100d5e8a3cbd373c3470a1fc7 --- /dev/null +++ b/models/ncu/vocab.txt @@ -0,0 +1,31 @@ +| +̱ +e +a +o +n +ɔ +m +y +k +b +s +r +i +ɛ +ŋ +w +- +u +t +f +p +g +l +d +ò +' +h +ʻ +ʼ + diff --git a/models/ndj/G_100000.pth b/models/ndj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff501bf053f490ca36e0bf72029decd7d787da4a --- /dev/null +++ b/models/ndj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0a8b6df837f9a452f0b50e0d8185eca1be784db8c26c193934dd6e6e4a76661 +size 145476099 diff --git a/models/ndj/config.json b/models/ndj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ndj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ndj/vocab.txt b/models/ndj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fcf012654ebc52f1af6ea4321aaddf80a54be4a5 --- /dev/null +++ b/models/ndj/vocab.txt @@ -0,0 +1,27 @@ + +v +y +l +h +p +o +t +c +i +s +d +m +k +w +' +_ +b +r +z +f +g +u +a +e +n +j diff --git a/models/ndp/G_100000.pth b/models/ndp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0da249c373a800ba7b8f760f37d564f830ee5238 --- /dev/null +++ b/models/ndp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ae39a2b23517c9b00fb9ede9a8542bff8ddd0e2583f4cca99e5ca4956b9cd6 +size 145475333 diff --git a/models/ndp/config.json b/models/ndp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ndp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ndp/vocab.txt b/models/ndp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..64b725a8caf41d814ce3b1e3e095ca2183ba9963 --- /dev/null +++ b/models/ndp/vocab.txt @@ -0,0 +1,26 @@ +| +i +a +o +e +n +k +u +r +m +d +b +' +t +y +l +s +z +g +w +p +h +v +f +- + diff --git a/models/ndv/G_100000.pth b/models/ndv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3794e1135bfc8d1fdc12472dc054b4e3c28f14e8 --- /dev/null +++ b/models/ndv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfdaacd8d7106450737ea1eb58695713ff9bb92194516ddeb819cbe62600adf6 +size 145482217 diff --git a/models/ndv/config.json b/models/ndv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ndv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ndv/vocab.txt b/models/ndv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..19433c0bc98fb38645ff7a5c3afd1c19effb60d4 --- /dev/null +++ b/models/ndv/vocab.txt @@ -0,0 +1,35 @@ +c +m +r +g +o +t +s +b +i +h +- +y +w +d + +í +u +ɗ +ɓ +é +_ +f +ŋ +ë +j +l +a +k +ˈ +n +e +p +ñ +ƴ +ú diff --git a/models/ndy/G_100000.pth b/models/ndy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d3b17fb245f1e1c8aa805d0c554bd39f002b93ba --- /dev/null +++ b/models/ndy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c90d1f6abe1d2aa1268c884296d42cdbee586a20d2684f200f19ed3c3c7e1b +size 145479175 diff --git a/models/ndy/config.json b/models/ndy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ndy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ndy/vocab.txt b/models/ndy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..249aaffc5c90c351556524a66017d3c9a55b75c1 --- /dev/null +++ b/models/ndy/vocab.txt @@ -0,0 +1,31 @@ +u +n +p +̱ +e +g +r + +t +_ +v +h +s +ɔ +b +w +l +ɗ +' +ə +i +y +a +k +f +d +j +o +m +ɓ +z diff --git a/models/ndz/G_100000.pth b/models/ndz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a5b50590d6f1f331258bea9fda55c3b057a2ef2 --- /dev/null +++ b/models/ndz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19f0474e2b4874e085521ec70af135c5ccafebbb6fc07e5e56a70ec1bbd57b94 +size 145495268 diff --git a/models/ndz/config.json b/models/ndz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ndz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ndz/vocab.txt b/models/ndz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c60362392a2f1d386e38a88459b8f7587cfa403a --- /dev/null +++ b/models/ndz/vocab.txt @@ -0,0 +1,52 @@ +| +n +í +d +b +t +a +ì +m +g +á +k +i +à +' +o +ó +e +ò +y +ù +u +j +â +è +l +c +ṛ +w +ú +é +v +z +- +p +î +s +û +ŋ +r +ô +ê +f +0 +1 +4 +2 +— +7 +8 +6 + diff --git a/models/neb/G_100000.pth b/models/neb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..22b1cab51bc5afa541077cf2ec4e3d130e8cb50a --- /dev/null +++ b/models/neb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb7ab95b7ca1926707ff65c6bc583317b10658bdd2512ca946d849c52fb383a +size 145491438 diff --git a/models/neb/config.json b/models/neb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/neb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/neb/vocab.txt b/models/neb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2b9231d2400ea1869f1e5cd301aaec38907f6045 --- /dev/null +++ b/models/neb/vocab.txt @@ -0,0 +1,47 @@ +| +à +n +l +a +ɛ +e +i +k +g +w +á +b +̀ +' +o +ɔ +- +t +é +y +́ +p +ó +m +h +u +è +ò +s +z +d +í +ù +ɩ +ú +ʋ +̂ +f +ì +v +â +ê +î +ô +` + diff --git a/models/new/G_100000.pth b/models/new/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dab1fe8c687d85b1b8368cf8738fa329eaaa5ee1 --- /dev/null +++ b/models/new/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380941bce3e3b080e78c11dc2fdd483431e01e7343f9d0e2d1c0ded9ef5a8027 +size 145504497 diff --git a/models/new/config.json b/models/new/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/new/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/new/vocab.txt b/models/new/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9c398e18843828dfc2602545ef2213e7eaf9dc18 --- /dev/null +++ b/models/new/vocab.txt @@ -0,0 +1,64 @@ +क +स +२ +ो +ढ +े +म +छ +ँ +ठ +ट +ब +१ +ई +ज +ा +ओ +उ +थ +ञ +न +द +अ +ह +र +‍ +ए +ङ +ग +_ +ः +श +ै +० +' +ण +ख +६ +ू +् +ृ +व +झ +- +ं +य +प +फ +ऊ +भ +इ +ड +आ +ि +ध +ौ +च +ु +त +ी +ल + +ष +घ diff --git a/models/nfa/G_100000.pth b/models/nfa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd663538f3f8b7b41fa3b2f38eec7917ff6cb251 --- /dev/null +++ b/models/nfa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a52e31f9c7fbd51a0fd5c150f6d366343adfeb3efef610c76a1491e39dc77654 +size 145476087 diff --git a/models/nfa/config.json b/models/nfa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nfa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nfa/vocab.txt b/models/nfa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c5b61604d6890a1b24f0b5d3ec2c6e6d7dc5f3ec --- /dev/null +++ b/models/nfa/vocab.txt @@ -0,0 +1,27 @@ +c +t +p +_ + +- +e +i +g +' +y +k +r +d +m +s +j +w +n +u +f +l +b +è +h +a +o diff --git a/models/nfr/G_100000.pth b/models/nfr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3d9d8b5698a995c34a169ced921b2bb152ce4b6e --- /dev/null +++ b/models/nfr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b2e5bbda3a1974dca41fd2ac1cb0e20a1c997e6204ce857d4af8e726bf3662c +size 145480026 diff --git a/models/nfr/config.json b/models/nfr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nfr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nfr/vocab.txt b/models/nfr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..423a4a73fc1853c1de091466b858a1c3b654cd7d --- /dev/null +++ b/models/nfr/vocab.txt @@ -0,0 +1,32 @@ +| +a +n +e +i +r +k +u +m +y +ɛ +l +p +o +ↄ +b +w +t +s +ŋ +g +h +f +j +d +c +' +z +v +- +2 + diff --git a/models/nga/G_100000.pth b/models/nga/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d3660da7268e305ba231c4532f505a60ffea471 --- /dev/null +++ b/models/nga/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:957ea8e2a2e5e6328addc126d9a87936104ee6bbbff6392160917eb600157bda +size 145489143 diff --git a/models/nga/config.json b/models/nga/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nga/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nga/vocab.txt b/models/nga/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8d473afba58d1950a7555fc8f8a77fd7c755e065 --- /dev/null +++ b/models/nga/vocab.txt @@ -0,0 +1,44 @@ +u +y +à +̃ +̀ +w +d +́ +ĩ +a +l +ã +t +n +ũ +ɛ +á +ì +k +ú +o +g +' +s +f +é +ò +è +m +_ +r + +- +z +ɔ +ó +i +ù +í +v +h +p +b +e diff --git a/models/ngl/G_100000.pth b/models/ngl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5bf98c31046d24fba6ad186287c7d65f8c8cc881 --- /dev/null +++ b/models/ngl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0150a65df2b3da087da573a857bb278780ec0063446bd51ac25bf78f46d12075 +size 145472979 diff --git a/models/ngl/config.json b/models/ngl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ngl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ngl/vocab.txt b/models/ngl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..964255be98e987923ce787ad730da5f79016fadc --- /dev/null +++ b/models/ngl/vocab.txt @@ -0,0 +1,23 @@ +l +h + +u +_ +- +t +p +y +r +s +i +m +o +c +v +n +a +e +k +' +w +f diff --git a/models/ngp/G_100000.pth b/models/ngp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0292e5060d6f342a3baf1d267a1c3d924b5bcd9b --- /dev/null +++ b/models/ngp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029bef91b32128fde761ef56c92f8dab10e1eca9414aa5286c33c234baa04736 +size 145476099 diff --git a/models/ngp/config.json b/models/ngp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ngp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ngp/vocab.txt b/models/ngp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4a19940ee4e36d966d594d92ae7716a85d944e9d --- /dev/null +++ b/models/ngp/vocab.txt @@ -0,0 +1,27 @@ +' +v +r +b +t +k +u +w +_ +a + +y +m +d +g +l +j +h +z +o +i +c +p +f +e +s +n diff --git a/models/ngu/G_100000.pth b/models/ngu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..552d2f7856c6b7b257f8944c2a0beb5c48029aaa --- /dev/null +++ b/models/ngu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10b2a4d229aa8fa355e4e0c8066a11b78489da4c804bfa02477e9aa98680b39 +size 145482993 diff --git a/models/ngu/config.json b/models/ngu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ngu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ngu/vocab.txt b/models/ngu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c88b60d71baa31f310d92acee43c70147b2cd8e9 --- /dev/null +++ b/models/ngu/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +n +e +u +t +o +j +c +l +h +m +q +s +p +y +x +r +d +z +b +ú +á +í +g +ó +é +f +v +ñ +0 +1 +2 +k + diff --git a/models/nhe/G_100000.pth b/models/nhe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..337747a0457b81b9f0a3da9f53b0074c1c87a7cd --- /dev/null +++ b/models/nhe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c11b02fdfebb365bf161ccccc81245c64e9a03c8bf8e2652c078a96df6514ff +size 145481475 diff --git a/models/nhe/config.json b/models/nhe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nhe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nhe/vocab.txt b/models/nhe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..94da1bcd06156107324f14cc435c09f726be0f7c --- /dev/null +++ b/models/nhe/vocab.txt @@ -0,0 +1,34 @@ +a +| +i +t +u +n +e +l +c +o +h +q +j +m +p +s +y +x +r +' +z +d +- +b +f +g +v +0 +1 +2 +k +4 +3 + diff --git a/models/nhi/G_100000.pth b/models/nhi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e844bb9a7616ed4a57d4afb5157d7382f2043fae --- /dev/null +++ b/models/nhi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83ec29346978ae5e17eed68702ab10c9657b3379529c4df5835d25d91b1a405a +size 145483780 diff --git a/models/nhi/config.json b/models/nhi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nhi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nhi/vocab.txt b/models/nhi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..28158450ac4f7d8834c6db47d4c597e54c02cf07 --- /dev/null +++ b/models/nhi/vocab.txt @@ -0,0 +1,37 @@ +| +i +a +n +t +o +c +u +h +l +e +m +s +q +p +y +z +x +d +r +j +ú +í +— +b +é +g +v +f +ó +á +' +ñ +k +­ +- + diff --git a/models/nhu/G_100000.pth b/models/nhu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..526f2e954ce782c870d25b83d379744eac6cf8d3 --- /dev/null +++ b/models/nhu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08e527d2b8b0fd630f51fb533516d238bc1a5d15716a90a84dca745da605a42 +size 145490007 diff --git a/models/nhu/config.json b/models/nhu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nhu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nhu/vocab.txt b/models/nhu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..42bf9d226a30d3974f9d0bf43ca81bca1bd981d8 --- /dev/null +++ b/models/nhu/vocab.txt @@ -0,0 +1,45 @@ +| +e +ɛ +n +o +b +u +w +i +k +a +y +m +l +v +s +t +f +ŋ +g +d +j +c +ú +ó +' +ù +ô +̂ +̀ +h +́ +ì +î +ò +è +í +p +é +á +à +û +â +ê + diff --git a/models/nhw/G_100000.pth b/models/nhw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d9849d7e8150d270d719f43d98ba470fc74fa83 --- /dev/null +++ b/models/nhw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66082f79f88184cabc8f24b27059a645422f61cbee501e350a347512f3f00d2f +size 145483867 diff --git a/models/nhw/config.json b/models/nhw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nhw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nhw/vocab.txt b/models/nhw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e600ad22476dbebd27e05f504a005af0e28c0845 --- /dev/null +++ b/models/nhw/vocab.txt @@ -0,0 +1,37 @@ +a +| +i +t +u +n +c +l +e +o +h +m +q +s +p +j +y +r +x +z +í +d +ú +b +á +— +é +f +' +g +ó +v +0 +1 +2 +k + diff --git a/models/nhx/G_100000.pth b/models/nhx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f9d410ddcfefffd37cc6daf93efdb12a8efd32e --- /dev/null +++ b/models/nhx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ab5b1f1785366a3285207628b7265507b9a1c54160437056df633802c7f12b6 +size 145485401 diff --git a/models/nhx/config.json b/models/nhx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nhx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nhx/vocab.txt b/models/nhx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..738372418f2e8952f9d78437112c26310b420504 --- /dev/null +++ b/models/nhx/vocab.txt @@ -0,0 +1,39 @@ +| +i +a +̱ +n +e +j +k +o +t +m +y +w +s +l +' +á +p +g +c +h +d +r +u +x +z +ó +í +é +ú +‐ +b +f +— +v +q +ñ +3 + diff --git a/models/nhy/G_100000.pth b/models/nhy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5dfb8a24ab9723427655c39caafe094815a19ac0 --- /dev/null +++ b/models/nhy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b932fad8a57ecd35da675479ebcd5d0770474669c5280f337b88c0078626b14b +size 145481355 diff --git a/models/nhy/config.json b/models/nhy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nhy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nhy/vocab.txt b/models/nhy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b7dad277da24749dd3c7746ae59401849668546b --- /dev/null +++ b/models/nhy/vocab.txt @@ -0,0 +1,34 @@ +z +o +h +x +u +ñ +j +t +f +p +y +r +é +c + +a +d +v +n +ó +e +b +á +ú +m +k +w +l +_ +í +i +g +s +q diff --git a/models/nia/G_100000.pth b/models/nia/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..46618ffec4b3b1fb6b919d7471ef73e0dd14ae0a --- /dev/null +++ b/models/nia/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f0415852e2235ed459c19d48d991c85134cd7d9406b241006fd03b5824a49be +size 145481437 diff --git a/models/nia/config.json b/models/nia/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nia/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nia/vocab.txt b/models/nia/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fea122991f9c4040bc67929cc1958fa5c30b4d7a --- /dev/null +++ b/models/nia/vocab.txt @@ -0,0 +1,34 @@ +| +a +i +ö +n +o +u +m +e +l +b +r +d +h +s +' +g +t +w +f +k +z +y +̃ +- +0 +p +j +2 +4 +6 +9 +1 + diff --git a/models/nij/G_100000.pth b/models/nij/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d409e061065406fc3ff36dc41fe377056c9cd302 --- /dev/null +++ b/models/nij/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7b037a8059dd7ee3a48af00803c28101156516d1cbed47f73de12fb9d6245c +size 145481473 diff --git a/models/nij/config.json b/models/nij/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nij/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nij/vocab.txt b/models/nij/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3e29632c6238a140d530fbc804d3b7e028e646d1 --- /dev/null +++ b/models/nij/vocab.txt @@ -0,0 +1,34 @@ +a +| +n +e +t +i +h +k +u +m +o +l +r +g +s +p +b +j +d +w +y +- +c +' +0 +z +4 +7 +5 +1 +2 +6 +f + diff --git a/models/nim/G_100000.pth b/models/nim/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8891178ac1d123ebaa2e8d3148fcbf33e7c3a994 --- /dev/null +++ b/models/nim/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd822f543a1f3e682150cbdf71f2ca243936ad2c9ceb7479e9632df621b5a53 +size 145479151 diff --git a/models/nim/config.json b/models/nim/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nim/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nim/vocab.txt b/models/nim/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e66b07292c417d3ad357b6a20ccfac6631df41a0 --- /dev/null +++ b/models/nim/vocab.txt @@ -0,0 +1,31 @@ +a +| +u +n +i +ĩ +k +l +e +m +o +g +t +s +ũ +w +p +y +z +d +b +h +j +- +' +f +r +0 +2 +4 + diff --git a/models/nin/G_100000.pth b/models/nin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8f5161edfe5948b51968cd80cc04b9086dbc92b1 --- /dev/null +++ b/models/nin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73a1c5df545a2b7f3e24e030e59efb1f50f59edafc470874c395166f8a13b06 +size 145492233 diff --git a/models/nin/config.json b/models/nin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nin/vocab.txt b/models/nin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..10bda61bba4ce00d5397b57d5758d761c127613c --- /dev/null +++ b/models/nin/vocab.txt @@ -0,0 +1,48 @@ +a +_ +l +h +i +o +á +p +j +s +ù +b +k +g +v +à +y +̱ +ò +n +t +ě +u +î +ā +ú + +z +d +â +e +ɨ +û +é +- +' +ī +̀ +ǎ +ū +è +ì +m +í +r +c +f +w diff --git a/models/nko/G_100000.pth b/models/nko/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..da8bfaf5fa12419b6764bf1883878d693b63e645 --- /dev/null +++ b/models/nko/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da4e2e75a2ebb925633a6c542fb78a3a5c534578ef8832578640dca5a0cce520 +size 145482993 diff --git a/models/nko/config.json b/models/nko/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nko/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nko/vocab.txt b/models/nko/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc4d5e8b4418f4a441c395d3775e3b89599b5af0 --- /dev/null +++ b/models/nko/vocab.txt @@ -0,0 +1,36 @@ +y +p +n +á +ɔ +s +h +ŋ +é +o +í +a +- + +k +r +ɛ +ʋ +— +t +́ +m +ó +e +u +b +g +l +i +ɩ +' +_ +f +w +d +ú diff --git a/models/nlc/G_100000.pth b/models/nlc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..189f84007322f8742e4f185bfca13aaa69c0ff73 --- /dev/null +++ b/models/nlc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29670b8018c251e38b7b81c536d3262559ff9ef490c48e5287e704e020a82903 +size 145481471 diff --git a/models/nlc/config.json b/models/nlc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nlc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nlc/vocab.txt b/models/nlc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3f39ab5b317ad060d700e1e27c7112f619bde4f2 --- /dev/null +++ b/models/nlc/vocab.txt @@ -0,0 +1,34 @@ +4 +e +` +l +o +n +p +f +m +2 +w +k +d +u +h +i +3 +0 +_ +y +c +a +z +j +t +' +s +b + +r +- +v +1 +g diff --git a/models/nld/G_100000.pth b/models/nld/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c0f8442f4b9be63ec1de8fcd423e965d723871e --- /dev/null +++ b/models/nld/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b09e9917b07f06dd911045c8fc8738594b4c4d65c55223c46335093a4904816 +size 145486855 diff --git a/models/nld/config.json b/models/nld/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nld/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nld/vocab.txt b/models/nld/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..632977b9b1773234c5e84a81a34813456f89aac6 --- /dev/null +++ b/models/nld/vocab.txt @@ -0,0 +1,41 @@ +g +s +- +– +ó +i +_ +q +v +w +j +h +ü +y +è +m +z +ï +b +é +n +u + +k +l +ë +í +o +p +c +á +r +a +f +ä +d +ú +' +t +x +e diff --git a/models/nlg/G_100000.pth b/models/nlg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e81c33b43ec57737acc3f52c23d8081847d274ac --- /dev/null +++ b/models/nlg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228e2bc94a88f604c3a0298eadc81c2de22818d1d37a014fd762fd00c6d6cc10 +size 145478361 diff --git a/models/nlg/config.json b/models/nlg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nlg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nlg/vocab.txt b/models/nlg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..45855c3d70d8438d3d9765fddf8dedb8d918d10b --- /dev/null +++ b/models/nlg/vocab.txt @@ -0,0 +1,30 @@ +r +d +- +p +v +j +o +f +e +i +q +g +n +c +u +k +w +t +h +b +s +m +z + +_ +a +' +y +x +l diff --git a/models/nlk/G_100000.pth b/models/nlk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f8b203eb52979fb9d2b621c31c26853eb93d6896 --- /dev/null +++ b/models/nlk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fdb36049f88a34f9fb784c2a96ea65e5a80d562bf5e426d6a5041b6d217b4a0 +size 145483021 diff --git a/models/nlk/config.json b/models/nlk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nlk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nlk/vocab.txt b/models/nlk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..53fcfb99a7907bbc57f31f9d4be7903abda7cc51 --- /dev/null +++ b/models/nlk/vocab.txt @@ -0,0 +1,36 @@ +m +c +0 +z +5 +t +r +s +j +o +i +f +g +e +d +6 +u +w + +n +p +2 +9 +k +a +4 +3 +_ +y +l +b +8 +1 +7 +h +- diff --git a/models/nmz/G_100000.pth b/models/nmz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f1e10a80bed055decc276ba8ab87031175815cb --- /dev/null +++ b/models/nmz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:660abb0464eeccae5882a52f5b18230d849f2daf9feef0b3f305a8c4ae43899a +size 145489147 diff --git a/models/nmz/config.json b/models/nmz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nmz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nmz/vocab.txt b/models/nmz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..14842e31670f2954ad222b3b667d95a336d7286e --- /dev/null +++ b/models/nmz/vocab.txt @@ -0,0 +1,44 @@ +s +m +ù +g +– +y +c +r +f +e +' +k +a +ɔ +1 +v +j +u +_ +ú +i +ì + +d +í +à +ɛ +- +w +p +` +o +2 +ĥ +l +á +t +ɦ +́ +n +ŋ +̈ +h +b diff --git a/models/nnb/G_100000.pth b/models/nnb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..94f1c454a62eb4cdef2ee0cb970506d0cd0d1837 --- /dev/null +++ b/models/nnb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899e7aa60409197474f96da008acda4c69a521ce26e41d6a232f7d8c64b41dcd +size 145482229 diff --git a/models/nnb/config.json b/models/nnb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nnb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nnb/vocab.txt b/models/nnb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a75a7d1368f11b48b4373909f888a01566a3f105 --- /dev/null +++ b/models/nnb/vocab.txt @@ -0,0 +1,35 @@ +a +| +n +e +o +b +y +m +k +i +u +ĩ +r +w +s +l +g +t +ũ +ʼ +h +d +z +p +- +f +­ +0 +v +1 +j +c +2 +3 + diff --git a/models/nnq/G_100000.pth b/models/nnq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..66604713ccb1b48a111ed3595a15cf1dc655f22a --- /dev/null +++ b/models/nnq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6a02d53a03b9e212901b75559ed5913224804b820bcbba3343162a1d795b989 +size 145475329 diff --git a/models/nnq/config.json b/models/nnq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nnq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nnq/vocab.txt b/models/nnq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2d70537bd7a42ef5c20d6dfe49ba3b78b20deb82 --- /dev/null +++ b/models/nnq/vocab.txt @@ -0,0 +1,26 @@ +a +| +n +e +i +u +k +o +g +m +b +l +w +t +j +p +d +h +y +c +s +' +f +z +v + diff --git a/models/nnw/G_100000.pth b/models/nnw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f8ff2aa2d5948094c97c1d1b8fd03aec23a58ea --- /dev/null +++ b/models/nnw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d2eb10bab03775ab6f980251652a433df80daf26ea9f2a2ceee657883197b3 +size 145496791 diff --git a/models/nnw/config.json b/models/nnw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nnw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nnw/vocab.txt b/models/nnw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..faec2dd9ac03810d60a1df7241800d8494af9c04 --- /dev/null +++ b/models/nnw/vocab.txt @@ -0,0 +1,54 @@ +| +ɑ +ə +n +ɩ +ʋ +̀ +t +́ +w +y +b +d +k +i +m +l +s +r +u +z +p +o +ɡ +c +ŋ +ɛ +ì +a +j +e +í +f +ń +- +á +ú +v +ù +ý +à +' +g +ɔ +ò +1 +h +ó +è +é +0 +̌ +2 + diff --git a/models/noa/G_100000.pth b/models/noa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..82dd8bb8a6638fa98a26154c18f446733cd1ae9b --- /dev/null +++ b/models/noa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c8e50871c463001ccd47201b30da7fc3f88bf72fdc257c3bd5b8967d5d9486a +size 145492233 diff --git a/models/noa/config.json b/models/noa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/noa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/noa/vocab.txt b/models/noa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a186571bb0369e5178cd9db94c5d6bac5c020ee5 --- /dev/null +++ b/models/noa/vocab.txt @@ -0,0 +1,48 @@ +a +| +h +i +m +r +u +ʌ +e +j +' +n +g +c +p +k +d +t +w +b +o +ã +s +ë +ö +ĩ +̈ +ẽ +õ +ũ +l +̃ +y +á +ú +í +— +ä +é +f +v +ó +z +ñ +́ +q +x + diff --git a/models/nod/G_100000.pth b/models/nod/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a347d8b936c7e51230c0105bf45dfe93999b3368 --- /dev/null +++ b/models/nod/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:453c7e292ddd2a52fabd941dc12f1320e1e38a46a9400d58b639f2b16f8ae3ce +size 145506053 diff --git a/models/nod/config.json b/models/nod/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nod/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nod/vocab.txt b/models/nod/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..50de080d527bf39f50992236100c2c4551091196 --- /dev/null +++ b/models/nod/vocab.txt @@ -0,0 +1,66 @@ +า +้ +น +อ +เ +่ +ต +ก +ง +ย +ั +ม +| +ห +ี +ว +จ +ร +ะ +บ +ู +๋ +ค +ป +ข +ด +ื +ิ +ล +ส +พ +แ +ไ +ใ +ฮ +์ +็ +ผ +โ +ึ +๊ +ุ +ํ +ถ +ญ +ซ +ท +ธ +ษ +ฟ +ณ +ศ +ภ +ฝ +ช +ฐ +ฆ +ฉ +ฤ +ฏ +ฎ +ฒ +ฑ +' +ฬ + diff --git a/models/nog/G_100000.pth b/models/nog/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..929ee016591f622655548728ad21f182465d1bda --- /dev/null +++ b/models/nog/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d37aeef0d66a17c9f3eb51d7455df48737ab0445dd35dbade1637a04de987e17 +size 145487604 diff --git a/models/nog/config.json b/models/nog/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nog/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nog/vocab.txt b/models/nog/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9dc6089417b4d29a81ceaac762cd6ec21431926c --- /dev/null +++ b/models/nog/vocab.txt @@ -0,0 +1,42 @@ +ф +и + +2 +х +с +ъ +э +л +у +– +0 +в +ш +к +о +г +- +р +п +н +т +1 +й +6 +ы +ь +3 +б +д +ю +е +4 +а +ц +м +' +ч +з +я +ж +_ diff --git a/models/not/G_100000.pth b/models/not/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5ddd45d3f99654da8c5559e80c5cda1c38c6cb8a --- /dev/null +++ b/models/not/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:791e3d34e687f10bcb2e947fc1ecea14710a8165ee450935d0501f455f964904 +size 145486719 diff --git a/models/not/config.json b/models/not/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/not/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/not/vocab.txt b/models/not/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d10095c083bb574ef019bdc25964a1ca1834fcb6 --- /dev/null +++ b/models/not/vocab.txt @@ -0,0 +1,41 @@ +0 + +8 +á +6 +c +ó +b +p +l +1 +q +v +2 +j +r +— +ë +m +t +o +d +' +n +3 +é +9 +s +h +í +7 +z +5 +i +y +a +e +4 +g +u +_ diff --git a/models/npl/G_100000.pth b/models/npl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..60c0aa8ecdc8f6873958fe84af2ae2748d61372a --- /dev/null +++ b/models/npl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20874eb5e7dfcbc58b0714126b6178250176b376c07362dbdc381eb898d51d53 +size 145483783 diff --git a/models/npl/config.json b/models/npl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/npl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/npl/vocab.txt b/models/npl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..371f051645208203d5d1e2be6b39eff47b5cbe8a --- /dev/null +++ b/models/npl/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +k +o +t +n +e +l +h +m +s +w +j +c +p +y +r +u +d +x +ó +í +á +ú +é +b +— +g +f +v +z +q +ñ +' +- + diff --git a/models/npy/G_100000.pth b/models/npy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..13a64bcaf8ec6c6e213eba8436a18ad9adbda1dd --- /dev/null +++ b/models/npy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c638d76b45b4bbb54ed025fb0bf5faaa1ab623afdde3c37756cb608d34516828 +size 145475297 diff --git a/models/npy/config.json b/models/npy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/npy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/npy/vocab.txt b/models/npy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..576a034cd5e45c34dace6ba48055dfee0e1f0205 --- /dev/null +++ b/models/npy/vocab.txt @@ -0,0 +1,26 @@ +a +| +i +n +u +o +e +m +l +p +h +t +k +r +d +g +b +s +w +y +- +' +j +f +c + diff --git a/models/nst/G_100000.pth b/models/nst/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9963629204f4c46e78d76f2317bd1fe28fd809a7 --- /dev/null +++ b/models/nst/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb711e5f07cd12127d9127e56d2291fce9fa53a8168e8384bb90c45bc488e032 +size 145476227 diff --git a/models/nst/config.json b/models/nst/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nst/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nst/vocab.txt b/models/nst/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..96d1178ea5bb9296a62b0503e5aa0744a83d81d3 --- /dev/null +++ b/models/nst/vocab.txt @@ -0,0 +1,27 @@ +h +ü +p +s +i +d +a +y +j + +z +t +' +ā +v +n +l +k +b +m +o +g +e +c +_ +r +u diff --git a/models/nsu/G_100000.pth b/models/nsu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6c12b14b6aaeeb4662bee767a2e6f15ba5f9932 --- /dev/null +++ b/models/nsu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70855d59df740d8f7e654bbf38b7f7c757afc194127b8eea3d7ceeeb3c1f113f +size 145486715 diff --git a/models/nsu/config.json b/models/nsu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nsu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nsu/vocab.txt b/models/nsu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b743472c2077e29b051f983026c464366df717a6 --- /dev/null +++ b/models/nsu/vocab.txt @@ -0,0 +1,41 @@ +q +1 +i +ó +y +l +a +e +2 +— +í +m +p +4 +j +s +z +d +h +b +u +c +r + +v +w +k +n +t +ñ +0 +_ +5 +g +á +x +6 +é +f +o +ú diff --git a/models/ntm/G_100000.pth b/models/ntm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2be00870b3d00338e2757bfca2f4958ca90adc2c --- /dev/null +++ b/models/ntm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdd2ed3f2688c4896931bb504150f95f4c32eda600409e18348e800205a62853 +size 145487613 diff --git a/models/ntm/config.json b/models/ntm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ntm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ntm/vocab.txt b/models/ntm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..175a09ef1ff69b06101e61bc328e0f932ab06993 --- /dev/null +++ b/models/ntm/vocab.txt @@ -0,0 +1,42 @@ +ù +ǹ +e +a +í +ṵ +ɔ +̰ +ɛ +̀ +c +n +- +k +à +o +p +y +f +ḿ +b +́ +è +ú +w +i +ó +ḭ +t +' +u +á +s +é +ń +_ +ì +d + +h +m +ò diff --git a/models/ntr/G_100000.pth b/models/ntr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0950c70d0152a19ac0c2cf2d1faeffd3e3feda5 --- /dev/null +++ b/models/ntr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2067d29973331ebcde9a34e4af633fe74886f5d1e8916e60f2fab97c43b0dc0f +size 145476822 diff --git a/models/ntr/config.json b/models/ntr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ntr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ntr/vocab.txt b/models/ntr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3f3866bb3eba0e53d7bda88d3774ec517da65129 --- /dev/null +++ b/models/ntr/vocab.txt @@ -0,0 +1,28 @@ +| +a +e +n +ɛ +o +ŋ +ↄ +k +y +b +i +m +l +u +d +t +w +s +r +g +p +f +' +h +3 +0 + diff --git a/models/nuj/G_100000.pth b/models/nuj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e1a9bf4e3f9fc0e7d2d44cd97441072d00004c7 --- /dev/null +++ b/models/nuj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50e5884b9038113faf697c3a4275d1d4b3911de1abb2d50bffd4babc3fe10d8 +size 145478385 diff --git a/models/nuj/config.json b/models/nuj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nuj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nuj/vocab.txt b/models/nuj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c1dce24d9f972e5228395a01c969d287ee7cf312 --- /dev/null +++ b/models/nuj/vocab.txt @@ -0,0 +1,30 @@ +m +j +p +a +v +s +l +_ +w +f +y +k +r +d +b +e +n +' +u +- +ŋ +z +i +g +o +h + +c +ʼ +t diff --git a/models/nus/G_100000.pth b/models/nus/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..094ebe7279faed99c09df27b4a932aee65018562 --- /dev/null +++ b/models/nus/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6e516f94cfcb29bb93b4f5fca025449dcedc43fef75e34654c82ba1a91b7c4 +size 145481475 diff --git a/models/nus/config.json b/models/nus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nus/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nus/vocab.txt b/models/nus/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8f7feaf73a34c91ae14f597afd7d88e15e357928 --- /dev/null +++ b/models/nus/vocab.txt @@ -0,0 +1,34 @@ +| +ɛ +k +i +a +n +t +̱ +ä +u +c +ɔ +l +r +m +h +d +e +y +j +ö +ŋ +b +ë +o +ɣ +w +p +g +- +̈ +' +1 + diff --git a/models/nuz/G_100000.pth b/models/nuz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..99a9a69dc99e62c740c6d0f711d8f46c0ade6328 --- /dev/null +++ b/models/nuz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817b6f117bb5526521243e15166df9304be4cb0f07b42453ad4862fa8f851f2f +size 145481445 diff --git a/models/nuz/config.json b/models/nuz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nuz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nuz/vocab.txt b/models/nuz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7a918b7972e9d75245aa5bc3309b8378280bbd49 --- /dev/null +++ b/models/nuz/vocab.txt @@ -0,0 +1,34 @@ +í +u +e +r +z +_ +é +t +g + +f +ú +s +l +h +p +i +o +q +c +a +ñ +á +x +k +m +ó +— +j +b +y +n +v +d diff --git a/models/nwb/G_100000.pth b/models/nwb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..10a1cea601f68cda4b4d46051157f70059b3cb6c --- /dev/null +++ b/models/nwb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7490baad5a04a4d471e52ec49d082134d1e9ad8d6cb80d52ad638679ee0f11 +size 145480705 diff --git a/models/nwb/config.json b/models/nwb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nwb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nwb/vocab.txt b/models/nwb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d0285cba42a4a1bfe1cb93841a9ee42e30216300 --- /dev/null +++ b/models/nwb/vocab.txt @@ -0,0 +1,33 @@ +| +' +a +- +n +e +l +ɔ +ʋ +i +ɩ +y +k +ɛ +b +m +o +u +g +h +d +z +t +p +w +s +r +f +j +c +v +q + diff --git a/models/nxq/G_100000.pth b/models/nxq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5d3b58696521462dc572dcba0f7010ea8e10ac7 --- /dev/null +++ b/models/nxq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:691fe42931750e5f2d7a7f928ad069367f1215334b5299dcc9baeb2f11eebd20 +size 145477728 diff --git a/models/nxq/config.json b/models/nxq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nxq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nxq/vocab.txt b/models/nxq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..402713b2492ce3309f368aba77e602615226b269 --- /dev/null +++ b/models/nxq/vocab.txt @@ -0,0 +1,29 @@ +n +b +_ +l +m +u + +' +e +x +r +w +o +p +j +d +a +k +h +z +y +i +g +f +t +v +s +c +q diff --git a/models/nya/G_100000.pth b/models/nya/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..632a735af845e7fa3d2ef70a32759252c864a9a3 --- /dev/null +++ b/models/nya/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c35b30a434da3bb1985b071aebb8ffd27286f91894e1436994fe8a953483c85 +size 145485299 diff --git a/models/nya/config.json b/models/nya/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nya/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nya/vocab.txt b/models/nya/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e72ecf80c1e9c791c5812469c18031a26e459d5 --- /dev/null +++ b/models/nya/vocab.txt @@ -0,0 +1,39 @@ +m +w +i +_ +7 +u +d +z +b +q +p +o +4 +g +v +e +' +c +3 +9 +y +t +5 +ʼ +8 +k +a +2 +s +h +f +r +1 +n +0 +l + +6 +j diff --git a/models/nyf/G_100000.pth b/models/nyf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d42ddd85159416ec27abb2d9ac7fdd52d31f8718 --- /dev/null +++ b/models/nyf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87181581bbb867fc1abda20c1cdd1f496c3e15de71a2e26c3854672cd8b4b42a +size 145477625 diff --git a/models/nyf/config.json b/models/nyf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nyf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nyf/vocab.txt b/models/nyf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c22eddf49f53d9e25c2732e92c3394edf4919e25 --- /dev/null +++ b/models/nyf/vocab.txt @@ -0,0 +1,29 @@ +a +| +i +u +n +k +m +e +h +o +w +r +l +z +d +g +y +s +t +b +' +v +f +j +c +p +̱ +- + diff --git a/models/nyn/G_100000.pth b/models/nyn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1da3e244f926104ec9f984e4c1a0d606caa76d4f --- /dev/null +++ b/models/nyn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f106a0d19391fa290b1c8f84c3930d998a967646a19c04cd7e57b2ac16e6277 +size 145476945 diff --git a/models/nyn/config.json b/models/nyn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nyn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nyn/vocab.txt b/models/nyn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..60cc97283bc91c2ec937d849a741d06c5fbe2238 --- /dev/null +++ b/models/nyn/vocab.txt @@ -0,0 +1,28 @@ +a +| +i +e +u +r +n +o +k +b +m +w +y +g +h +t +s +' +z +d +j +l +p +f +c +- +v + diff --git a/models/nyo/G_100000.pth b/models/nyo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fc01ced289fcb34584e35a52753df16e9e0faac1 --- /dev/null +++ b/models/nyo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9ff6bc68086e3e928c4ec5b04ad4f592494af8a75d83df97773008171fcc3a +size 145476731 diff --git a/models/nyo/config.json b/models/nyo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nyo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nyo/vocab.txt b/models/nyo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..753699284fc68690e6688867f69f2a3e141bd996 --- /dev/null +++ b/models/nyo/vocab.txt @@ -0,0 +1,28 @@ +a +| +i +u +e +n +o +b +k +r +m +w +g +y +t +h +s +z +l +' +d +j +f +p +c +- +v + diff --git a/models/nyy/G_100000.pth b/models/nyy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c06b7246d13361d1d5d9c99cda580be32b9e19d --- /dev/null +++ b/models/nyy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ddbce5bd7fe2f72c8941d24cab5cd104b8451a0f9ff2c3869ff2ec33332434 +size 145489895 diff --git a/models/nyy/config.json b/models/nyy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nyy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nyy/vocab.txt b/models/nyy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7ed4b98997354d2c0c70b6147077e8b5bdaaefa4 --- /dev/null +++ b/models/nyy/vocab.txt @@ -0,0 +1,45 @@ +| +a +u +n +l +k +ị +b +i +m +o +e +g +s +w +t +y +ụ +p +j +ā +̄ +f +d +ō +ū +ī +ē +h +' +ạ +ḳ +ẹ +ọ +r +̣ +ṣ +ṃ +ḷ +v +ḅ +ỵ +ȳ +ẉ + diff --git a/models/nzi/G_100000.pth b/models/nzi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..148c08b5061f92ffcb45ba1f7b60fc145cd17e29 --- /dev/null +++ b/models/nzi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfe94be4bf2ba551095da4b5e53f87a8e086c35b2c21f2fa36332938005beac0 +size 145480799 diff --git a/models/nzi/config.json b/models/nzi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/nzi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/nzi/vocab.txt b/models/nzi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6f39dbf2a8dc50b2e2b682af848a87995ad2f82e --- /dev/null +++ b/models/nzi/vocab.txt @@ -0,0 +1,33 @@ +m +1 +d +_ +c +i +n +- +u +2 +ɛ +f +l +k +h +a +5 +' +g + +y +w +o +z +9 +v +ɔ +b +s +r +p +e +t diff --git a/models/obo/G_100000.pth b/models/obo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..db3f9e0c5656fc099c97584964f55e024fd2720a --- /dev/null +++ b/models/obo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7efd9098d8aeff5ae83317d47408c75f104f52d9550a389989ed9016560e30a +size 145478377 diff --git a/models/obo/config.json b/models/obo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/obo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/obo/vocab.txt b/models/obo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3841c095585a227e7d06fb1c186b685f19026524 --- /dev/null +++ b/models/obo/vocab.txt @@ -0,0 +1,30 @@ +z +v +p +u +' +a +s +m +_ +d +r +h +k +- +b +i +f +g +j +q +w +c +x + +n +y +o +e +t +l diff --git a/models/ojb-script_latin/G_100000.pth b/models/ojb-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e173ac1c90f8ff9502b17a27262d38a137a90ed --- /dev/null +++ b/models/ojb-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa16b032011f8e40207830ad7e523f17dacdcb40d30d3f34fa8022527da9f61 +size 145475429 diff --git a/models/ojb-script_latin/config.json b/models/ojb-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ojb-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ojb-script_latin/vocab.txt b/models/ojb-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d2469d89fe62eba479225272cadb142f50cacf5f --- /dev/null +++ b/models/ojb-script_latin/vocab.txt @@ -0,0 +1,26 @@ +i +a +k +| +n +h +o +t +s +e +w +m +c +' +p +y +- +r +l +– +2 +0 +3 +z +5 + diff --git a/models/ojb-script_syllabics/G_100000.pth b/models/ojb-script_syllabics/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f29163bbb026921f76098c4cbc0d137b67fb7378 --- /dev/null +++ b/models/ojb-script_syllabics/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e5224008833dbd121d34703de6b7d3aea145ec9076b691b0a2cbe4eccfe6f12 +size 145575129 diff --git a/models/ojb-script_syllabics/config.json b/models/ojb-script_syllabics/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ojb-script_syllabics/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ojb-script_syllabics/vocab.txt b/models/ojb-script_syllabics/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..88e4bf90c3cd6f369a884b210cc0729c4eb341a0 --- /dev/null +++ b/models/ojb-script_syllabics/vocab.txt @@ -0,0 +1,156 @@ +| +ᓂ +ᐃ +ᓐ +ᑕ +ᑭ +ᒋ +ᑳ +ᐅ +ᑮ +ᑲ +ᒡ +ᐊ +ᒃ +ᔥ +ᔑ +ᐙ +ᐎ +ᓇ +ᐁ +ᑫ +ᑯ +ᒫ +ᓯ +ᒥ +ᑎ +ᐱ +ᒪ +ᐐ +ᐌ +ᓈ +ᕽ +ᔦ +ᐗ +ᐸ +ᒦ +ᑑ +ᐋ +ᑖ +ᑌ +ᓴ +ᔮ +ᓃ +ᔭ +ᑰ +ᐯ +ᓀ +ᐲ +ᓄ +ᔐ +ᑾ +ᐞ +ᒧ +ᓰ +ᒀ +ᔕ +ᓅ +ᒻ +ᑐ +ᑴ +ᔅ +ᒌ +ᓱ +ᐹ +ᓭ +ᒣ +ᐺ +ᓵ +ᒨ +ᒑ +ᔖ +ᓬ +a +ᕒ +e +ᐄ +x +ᔓ +s +ᐴ +ᑣ +ᔒ +ᐆ +ᒎ +ᐳ +ᑦ +i +r +ᓍ +ᓲ +ᓉ +ᔔ +l +ᑏ +ᒐ +ᑆ +m +h +o +u +p +t +ᐤ +n +ᑸ +j +d +ᔣ +ᒉ +c +ᔂ +ᔫ +ᔨ +ᒞ +ᑉ +ᔗ +ᑗ +ᔩ +b +2 +y +0 +g +3 +ᓶ +1 +v +ᒬ +ᑶ +4 +7 +z +ᔪ +ᐾ +ᓋ +– +ᒸ +ᒶ +ᔛ +k +ᒍ +8 +f +5 +w +ᔀ +9 +6 +ᔾ +ᔡ +ᐼ +ᐧ +ᒁ +q +ᒒ + diff --git a/models/oku/G_100000.pth b/models/oku/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d01acf943d7e2733dfbb0ffee9d1c2e3f6d810bc --- /dev/null +++ b/models/oku/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7f41a7b056c70510e131b38dcf624090551d15d42693d60702f3639f0614b6e +size 145493091 diff --git a/models/oku/config.json b/models/oku/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/oku/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/oku/vocab.txt b/models/oku/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b315a2747ce9c9d77e02ac94e09f3a1c0fae5b5b --- /dev/null +++ b/models/oku/vocab.txt @@ -0,0 +1,49 @@ +| +e +n +ɛ +y +i +s +k +g +l +a +h +ə +b +u +m +o +w +t +f +è +j +d +ŋ +c +́ +̀ +à +á +v +- +ò +̂ +é +ó +î +r +ì +ô +z +í +ú +û +â +ê +ù +1 +3 + diff --git a/models/old/G_100000.pth b/models/old/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9435dc617cafabac1faae9da4fdbcb43dc9e1e16 --- /dev/null +++ b/models/old/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24486bb95a1e26930bd69cc6473eedd9f0e339d83e07e0ed45224f0e6a5b2ae0 +size 145479905 diff --git a/models/old/config.json b/models/old/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/old/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/old/vocab.txt b/models/old/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d29ecae6fe30a323eb607c571816023ca39ab939 --- /dev/null +++ b/models/old/vocab.txt @@ -0,0 +1,32 @@ +| +a +i +n +o +u +k +e +y +w +m +h +l +d +s +f +t +r +c +g +b +ṟ +p +- +j +' +z +v +̱ +ḏ +ḵ + diff --git a/models/omw/G_100000.pth b/models/omw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..eb7994be060b78cc2c984ecb5be15a8a393addfb --- /dev/null +++ b/models/omw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef715d25536a7c25502bc34985ce1c26f747d21571d6b1d3edfb7aa6b739a6d +size 145479053 diff --git a/models/omw/config.json b/models/omw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/omw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/omw/vocab.txt b/models/omw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..48d94f64034ef1fb3e059593eb4caef5dacebcf0 --- /dev/null +++ b/models/omw/vocab.txt @@ -0,0 +1,31 @@ +q +k +4 +6 +7 +9 +- +m +0 +s +u +e +t +a +8 +i + +w +h +p +v +3 +5 +1 +y +o +_ +2 +r +n +f diff --git a/models/onb/G_100000.pth b/models/onb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..542f58ded8ad2a58f7ed701ab66ca6bfeedf21dc --- /dev/null +++ b/models/onb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:424a570145ce14a039dcd9d69992801dfc9f74f160956ba0ef230cb359dea591 +size 145476187 diff --git a/models/onb/config.json b/models/onb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/onb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/onb/vocab.txt b/models/onb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..57c995aa9af735ad52bb20ae9733df31aaf5fd68 --- /dev/null +++ b/models/onb/vocab.txt @@ -0,0 +1,27 @@ + +s +_ +z +y +a +n +v +d +o +m +x +i +h +b +p +f +e +- +u +t +q +— +' +l +k +g diff --git a/models/ood/G_100000.pth b/models/ood/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..01d8e503748ea5e145a188cf98f7c238b7e867b3 --- /dev/null +++ b/models/ood/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a37d473e6d0a15b32f7980da400ad92b85f1ccf89c8f37593e405e4f354dbbc +size 145478399 diff --git a/models/ood/config.json b/models/ood/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ood/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ood/vocab.txt b/models/ood/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24fc3597fc13479af56e23159cfa10762a67e48e --- /dev/null +++ b/models/ood/vocab.txt @@ -0,0 +1,30 @@ +y +h +i +o +l +g +a +' +p +e +b +s +r +x +m +z +f +t +d +w +k +u +j +c +n +v +_ +- + +q diff --git a/models/orm/G_100000.pth b/models/orm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e06f911078a47a7a11f746eb997ba7625be8edd --- /dev/null +++ b/models/orm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f96577b7e7bc59e8eeab688cfa7d91e6134fbbb9f4c53d0b857e83623d4ac29 +size 145477501 diff --git a/models/orm/config.json b/models/orm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/orm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/orm/vocab.txt b/models/orm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..71d4b7e35a6b8ca26b95e3a43feb8c4723e858c9 --- /dev/null +++ b/models/orm/vocab.txt @@ -0,0 +1,29 @@ +r +' +- +f +b +n +j +y +t +d +z +m +e +h +u +q +p +a + +_ +o +x +s +w +g +i +c +l +k diff --git a/models/ory/G_100000.pth b/models/ory/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8b24036ca58768eeab198fb4a67a2429232467f --- /dev/null +++ b/models/ory/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcce2f817b83392372b089f7cdf597c9e3c02a3366df488494d6e40c83cc1687 +size 145513575 diff --git a/models/ory/config.json b/models/ory/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ory/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ory/vocab.txt b/models/ory/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..701231bec5476270f6374395dcae3c40e50bcdd8 --- /dev/null +++ b/models/ory/vocab.txt @@ -0,0 +1,76 @@ +ଛ +ୂ +– +ବ +ଞ +ଯ +ଇ +ପ +ନ +ର +ଆ +ଅ +0 +ଠ +4 +ଦ +ଙ +ଋ +େ +ଈ +ୈ +ଘ +ଏ +ଖ +ମ +ଳ +ଃ +1 +ଓ +ଊ +7 +ି +2 +ୃ +ଶ +ଣ +ଥ +ଟ +ଡ +ା +ଁ +ଭ +କ +ୀ +ୱ +' +ଂ +ଫ +ଗ +ଚ +଼ +ତ +5 +_ +ଐ +ଔ +- +ଧ + +ଢ +ଜ +ଲ +ଉ +9 +6 +­ +ୋ +୍ +ସ +ୟ +ଝ +ହ +3 +ୌ +ୁ +ଷ diff --git a/models/oss/G_100000.pth b/models/oss/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bdda9705002bab8800078390329f1aefe10ea086 --- /dev/null +++ b/models/oss/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6cf2de897e74c1bda24ecba0eeb552d0e484c91cf18764f95b209df61af76f +size 145481479 diff --git a/models/oss/config.json b/models/oss/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/oss/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/oss/vocab.txt b/models/oss/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f829245d214e60872699711d3d42bd2c1dd3b9b6 --- /dev/null +++ b/models/oss/vocab.txt @@ -0,0 +1,34 @@ +| +ӕ +ы +а +д +у +н +т +р +м +с +й +х +и +г +з +о +ц +к +л +ф +ъ +б +е +в +п +ч +ж +– +' +- +э +ю + diff --git a/models/ote/G_100000.pth b/models/ote/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c1b522c59975da799a0bf1786cbcaedb3846424 --- /dev/null +++ b/models/ote/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6767b41e23402e2fe57782157c248ecc47ee16c92426f0d47279c37fc7b4a229 +size 145486071 diff --git a/models/ote/config.json b/models/ote/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ote/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ote/vocab.txt b/models/ote/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..016d6d41eb2a0e908aa434092114b766bb22642f --- /dev/null +++ b/models/ote/vocab.txt @@ -0,0 +1,40 @@ +| +a +n +i +' +u +e +h +t +d +r +g +o +̱ +m +ʉ +b +y +j +s +p +á +x +c +ñ +ɛ +ú +ø +í +f +z +q +l +́ +ó +v +é +ü +k + diff --git a/models/otq/G_100000.pth b/models/otq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..da377eed7b9189b472255d7e1c4a4cb64a064e6d --- /dev/null +++ b/models/otq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2640d0695f2dbe2275d92183d6c2339a20642c8ba197482cc6af694fceb51f98 +size 145483866 diff --git a/models/otq/config.json b/models/otq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/otq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/otq/vocab.txt b/models/otq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f295a91d6422eef3334e066e31ad5a2543e6d216 --- /dev/null +++ b/models/otq/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +n +h +̱ +u +' +e +t +m +o +g +r +ö +d +b +y +s +x +j +ñ +p +k +á +w +f +z +l +ú +ë +— +ó +é +í +c + diff --git a/models/ozm/G_100000.pth b/models/ozm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d8fee56035b6e90387802cfd6bb9ad28ed6e0ec --- /dev/null +++ b/models/ozm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bd08775e9aaea64dc62a8c7a304180dae101e65c7514c30d8dc5504437ba958 +size 145502347 diff --git a/models/ozm/config.json b/models/ozm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ozm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ozm/vocab.txt b/models/ozm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d14ee9a24e475d508c512b5f2386ab03f217d26a --- /dev/null +++ b/models/ozm/vocab.txt @@ -0,0 +1,61 @@ +| +b +́ +m +n +e +ɨ +y +é +l +á +ó +a +ɛ +ʉ +s +ʼ +í +̂ +t +w +ɔ +o +ɑ +r +̌ +k +i +j +d +g +p +z +h +â +u +ú +ě +c +î +ŋ +ǎ +ê +û +ô +ø +ǿ +ǐ +ǒ +- +œ +' +f +v +ï +ǔ +q +x +ë +̍ + diff --git a/models/pab/G_100000.pth b/models/pab/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c492abc16b4ba14a54d84f98e89fab9ed7faf0d --- /dev/null +++ b/models/pab/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0ab90214859d4ec56325b533343756ca181150b42904fc25fdd6e56aea013d +size 145493639 diff --git a/models/pab/config.json b/models/pab/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pab/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pab/vocab.txt b/models/pab/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..394ceb79e0b34783ffbd5e9ebc9ee166689110fa --- /dev/null +++ b/models/pab/vocab.txt @@ -0,0 +1,50 @@ +a +| +e +i +o +h +t +k +n +y +r +x +s +m +l +w +j +u +— +b +d +c +f +p +é +á +ã +- +g +0 +ó +v +q +í +1 +z +2 +ô +4 +7 +5 +ê +ú +6 +3 +8 +9 +â +' + diff --git a/models/pad/G_100000.pth b/models/pad/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e95a4e69eff9c8d08da85f611f6db109e0bf4ff6 --- /dev/null +++ b/models/pad/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b1e18d5c86e335e296854190adf9655c9d3ef4e83b8da007d4a90146690bab +size 145494615 diff --git a/models/pad/config.json b/models/pad/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pad/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pad/vocab.txt b/models/pad/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..55faf67d239a238c09b845c752bbf32b99ec7be8 --- /dev/null +++ b/models/pad/vocab.txt @@ -0,0 +1,51 @@ +a +i +| +n +o +h +k +r +v +d +' +j +b +s +m +e +u +t +- +g +p +f +c +— +l +é +ã +x +á +ó +q +z +í +ô +1 +0 +7 +2 +4 +â +ê +ú +5 +6 +ç +3 +8 +9 +î +õ + diff --git a/models/pag/G_100000.pth b/models/pag/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a936b1e3e63b59233bf8f2e212a0cabda263c17c --- /dev/null +++ b/models/pag/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2725cc9f98033617423806b5b24ea3f27b726b0e7f2a23f12e72dc1001400d6 +size 145479909 diff --git a/models/pag/config.json b/models/pag/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pag/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pag/vocab.txt b/models/pag/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c5f858ae124447e11c42206ad06a53e4e6f456dd --- /dev/null +++ b/models/pag/vocab.txt @@ -0,0 +1,32 @@ +a +| +n +i +o +t +s +y +e +k +g +l +m +d +p +r +b +u +w +j +c +' +h +- +f +z +— +v +q +x +– + diff --git a/models/pam/G_100000.pth b/models/pam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a507235c6d47888ef3b55a774628d37680576316 --- /dev/null +++ b/models/pam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:035c7c8be0dc87dce612b67d00b7cc396c01f25804f12843b180ae9a0db1b0f6 +size 145480671 diff --git a/models/pam/config.json b/models/pam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pam/vocab.txt b/models/pam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..24bc87974a20d7868f9f2adc75a73036068fa5cc --- /dev/null +++ b/models/pam/vocab.txt @@ -0,0 +1,33 @@ +a +| +n +i +g +u +k +t +l +m +e +s +p +b +y +d +r +o +w +j +h +c +- +f +z +v +' +q +x +— +6 +9 + diff --git a/models/pan/G_100000.pth b/models/pan/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ffc1c341747b3771319bd87bcf99d6a1c013e111 --- /dev/null +++ b/models/pan/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed53dce149466fbee906cc88bbbc16175529b098937ff1a9b44700ed8ad4b78 +size 145500643 diff --git a/models/pan/config.json b/models/pan/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pan/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pan/vocab.txt b/models/pan/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..a52e4ea4fcd0bec0bed6fd48dfff8d3b94c51be0 --- /dev/null +++ b/models/pan/vocab.txt @@ -0,0 +1,59 @@ +ਏ +ਟ +ੰ +ੈ +ਠ +- +l +_ +ੜ +਼ +ਇ +ਪ +ਣ +ਥ +ਨ +ਸ +ਯ +ਫ +ਝ +ਊ +ਧ +ੁ +ਿ +ਜ +ਔ +ਦ +ੋ +ੌ +6 +ੂ +ਛ + +ੀ +ਗ +ਬ +੍ +ਐ +ਈ +ਘ +ਭ +ਂ +ਓ +ੇ +ਉ +ਅ +ਖ +ਡ +ਲ +ਆ +ਚ +ਵ +ਤ +ਹ +ਕ +ਢ +ਰ +ਾ +ਮ +ੱ diff --git a/models/pao/G_100000.pth b/models/pao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e03b22dba02fdd4fd8b4820eb95370125232aa5b --- /dev/null +++ b/models/pao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec3cb473f54edc7a1ff069485dcbac8d14eabcebfcdbfdd5304486aa3d49c80 +size 145483753 diff --git a/models/pao/config.json b/models/pao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pao/vocab.txt b/models/pao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b43d98fb6ce57c87178d5f8630d91676e5908b49 --- /dev/null +++ b/models/pao/vocab.txt @@ -0,0 +1,37 @@ +d +f +i +w +n +v +b +3 +m +_ +h +p +7 +l + +1 +ꞌ +q +6 +- +z +2 +x +u +y +j +8 +r +e +g +c +k +a +s +' +t +o diff --git a/models/pap/G_100000.pth b/models/pap/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fa37aa4bffb185d18588399a90ffa7cf1ffdd753 --- /dev/null +++ b/models/pap/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8641434bbc40fe49ddc59593e4cfe83595bf0ea13e9ced9b208292033909c943 +size 145494515 diff --git a/models/pap/config.json b/models/pap/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pap/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pap/vocab.txt b/models/pap/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dba21eb3d04f7441d51aa11d376eac0499bf9464 --- /dev/null +++ b/models/pap/vocab.txt @@ -0,0 +1,51 @@ +0 +' +f +7 +q +x +g +r +5 +_ +ó +y +4 +h +- +u +e +ù +b +l +n +o +— +z +2 +6 +m +ñ +j +ú +p +1 +t +a +ò +9 +8 +é +w +í +i +d +è +3 +c +s +ü +á + +v +k diff --git a/models/pau/G_100000.pth b/models/pau/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0abc7595c8d1443dc5d0cf43f40943dc738edf7d --- /dev/null +++ b/models/pau/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:718d5051d3cec925abebaac8e4e0090e247b061c38d9ed639cb05fdeb8f87df1 +size 145486083 diff --git a/models/pau/config.json b/models/pau/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pau/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pau/vocab.txt b/models/pau/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9450186ecc41ee684464e3558fd8a2725d6cb204 --- /dev/null +++ b/models/pau/vocab.txt @@ -0,0 +1,40 @@ +| +e +l +a +i +r +m +k +o +u +n +g +d +s +t +h +c +b +j +p +f +0 +y +v +' +x +4 +2 +- +1 +5 +9 +w +3 +7 +6 +— +8 +q + diff --git a/models/pbb/G_100000.pth b/models/pbb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..61d05075236f2ea3dd93cbc4ad5687e55a23390c --- /dev/null +++ b/models/pbb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da318534e425342f21420a96f47bba8a7a5d490ca27dd77a9fa639eac7ef9de +size 145486717 diff --git a/models/pbb/config.json b/models/pbb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pbb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pbb/vocab.txt b/models/pbb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ddd6f0e1d9b6b802a5f17f4f347734d622548af0 --- /dev/null +++ b/models/pbb/vocab.txt @@ -0,0 +1,41 @@ +| +a +' +e +y +s +u +j +t +c +i +n +w +r +p +h +m +ã +v +d +ẽ +ũ +l +ĩ +g +o +á +q +f +z +é +b +í +— +ñ +- +ó +x +ú +k + diff --git a/models/pbc/G_100000.pth b/models/pbc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..838c26caa2a480f83139e1c60c1021eafc809c3b --- /dev/null +++ b/models/pbc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c93c295cd04c414f916e538d62d08a3b450dff618ff66ab110ee118b70b63d +size 145487605 diff --git a/models/pbc/config.json b/models/pbc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pbc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pbc/vocab.txt b/models/pbc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..05b36ae929d81c4e78a5a490e16fe128490332a3 --- /dev/null +++ b/models/pbc/vocab.txt @@ -0,0 +1,42 @@ +| +a +k +p +n +e +o +t +à +i +l +ù +u +m +y +s +w +' +h +c +j +r +d +b +0 +g +1 +z +v +7 +2 +f +4 +5 +3 +6 +x +8 +9 +q +- + diff --git a/models/pbi/G_100000.pth b/models/pbi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1081b74b6c52f793a2e893d410b4bc52e2d9185e --- /dev/null +++ b/models/pbi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a6f4961ef061ca10c8082573af73fb66b5c97e2dcab23d6aaed8b152a890ee +size 145482961 diff --git a/models/pbi/config.json b/models/pbi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pbi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pbi/vocab.txt b/models/pbi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b330cbb9d7498c9b3e212f5e94565e8fdfde223f --- /dev/null +++ b/models/pbi/vocab.txt @@ -0,0 +1,36 @@ +| +a +ə +k +n +d +m +l +t +e +i +s +w +ŋ +b +r +u +z +h +g +ɗ +y +v +ɨ +á +à +f +ɓ +́ +p +j +c +- +̌ +1 + diff --git a/models/pce/G_100000.pth b/models/pce/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..43a115877e547923a53319ab7c8eb8f9d1ab1b51 --- /dev/null +++ b/models/pce/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15727662e6609b7862fa896fcced4f11a4ff000dbf308c1e908895ef51ac49b +size 145485287 diff --git a/models/pce/config.json b/models/pce/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pce/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pce/vocab.txt b/models/pce/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..583e1c2d201fae7d61188b56a083e9c9faeb1fe5 --- /dev/null +++ b/models/pce/vocab.txt @@ -0,0 +1,39 @@ +| +อ +า +ี +เ +ด +น +ม +ห +บ +ต +แ +ว +โ +ย +ึ +ั +ู +ง +ร +ก +ฆ +ซ +ป +ฮ +ล +ะ +ฌ +ค +ณ +็ +พ +ื +จ +ท +ช +' +ฟ + diff --git a/models/pcm/G_100000.pth b/models/pcm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a81d350bece0007f1da3e8236ec541c4520054f --- /dev/null +++ b/models/pcm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b8137657db1f2a31a7f25de6d450c8c0f5052959bc540d228fa5a16e2026489 +size 145483006 diff --git a/models/pcm/config.json b/models/pcm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pcm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pcm/vocab.txt b/models/pcm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4e87d6371c0753ba66d09ce6ddb1ebe7537e656c --- /dev/null +++ b/models/pcm/vocab.txt @@ -0,0 +1,36 @@ +| +e +o +a +i +n +d +s +t +r +y +l +m +w +k +u +g +p +f +b +h +v +c +j +- +z +x +0 +' +1 +2 +5 +4 +7 +3 + diff --git a/models/peg/G_100000.pth b/models/peg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c23130ebcca7ff17fb73814fff415ab857e7c9d2 --- /dev/null +++ b/models/peg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20b90e7dc3e06a02e941ae6066110f424e0a89cad3afd0ecb56a8b2813ccd1d9 +size 145484639 diff --git a/models/peg/config.json b/models/peg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/peg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/peg/vocab.txt b/models/peg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f49a5ab790d5f783002eca7f6b3c8869b089e78b --- /dev/null +++ b/models/peg/vocab.txt @@ -0,0 +1,38 @@ +| +ା +୍ +ି +େ +ର +ନ +ତ +କ +ହ +ଜ +ୱ +ୁ +ଗ +ମ +ଂ +ଇ +ଆ +ସ +ଦ +ପ +ବ +଼ +ଚ +ଲ +ଡ +ଙ +ୟ +ଣ +ଟ +ଏ +‍ +ଞ +ଉ +ଅ +ଓ +ଃ + diff --git a/models/pez/G_100000.pth b/models/pez/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..756f73f01ae10e04e05359ea64fca56e6ebc6634 --- /dev/null +++ b/models/pez/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02fe0cc3113d378d95b8243f0243d5bb4409e55d73bc67f543277149728c5f6 +size 145480686 diff --git a/models/pez/config.json b/models/pez/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pez/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pez/vocab.txt b/models/pez/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..562d5b726d75627886aaa793ffc7261f13b6c20a --- /dev/null +++ b/models/pez/vocab.txt @@ -0,0 +1,33 @@ +b +h +p +_ +- +g +l +0 +c +2 +u +n +o +6 + +a +j +i +e +v +1 +w +s +é +r +k +m +4 +y +z +d +t +' diff --git a/models/pib/G_100000.pth b/models/pib/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3ff534a1a1b27ad62b6a2aa1f303ad95a2a2988 --- /dev/null +++ b/models/pib/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39603d2c13dba24efc019333ca4bbb87c648b55ec27175859f431fa08bf9a7e6 +size 145473757 diff --git a/models/pib/config.json b/models/pib/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pib/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pib/vocab.txt b/models/pib/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..30f406e6f3a1c0304b8bfe7543d74a3cfd271b4e --- /dev/null +++ b/models/pib/vocab.txt @@ -0,0 +1,24 @@ +u +j +— +s +n +l +r +e +t +' +k +c +x + +m +a +g +y +p +w +o +i +_ +h diff --git a/models/pil/G_100000.pth b/models/pil/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4deb7b5b8b0c536977d4b349b9a5143cda9b788 --- /dev/null +++ b/models/pil/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbab6f07b7771f856711b6b17d40b68ee17ba47667c57cc41ccfb71c9e6b946 +size 145486081 diff --git a/models/pil/config.json b/models/pil/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pil/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pil/vocab.txt b/models/pil/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9a699ac5028709ffbe97a3946620c9b0ebbe22a9 --- /dev/null +++ b/models/pil/vocab.txt @@ -0,0 +1,40 @@ + +w +ò +f +t +d +v +p +u +g +- +ɛ +z +ə +n +à +c +r +ì +è +ʋ +k +ɣ +b +ɔ +h +̀ +m +l +y +e +i +_ +j +ǝ +o +s +ŋ +a +ù diff --git a/models/pir/G_100000.pth b/models/pir/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..db86c3f797c47fc047b6762bdfde8eeedeb822d0 --- /dev/null +++ b/models/pir/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab368fc6f9a04e2667a351ce81597c990a301f400ce5fcf55779989118a21c83 +size 145489133 diff --git a/models/pir/config.json b/models/pir/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pir/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pir/vocab.txt b/models/pir/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..19d8e8deb2a1b664fd02bba1cee175ea9f25684e --- /dev/null +++ b/models/pir/vocab.txt @@ -0,0 +1,44 @@ +| +a +i +e +r +o +h +ʉ +u +t +n +y +s +c +q +m +j +p +d +̃ +g +ñ +ã +b +w +ẽ +ũ +l +õ +ĩ +— +í +f +v +é +z +á +ú +' +ó +k +x +́ + diff --git a/models/pis/G_100000.pth b/models/pis/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..deb131c412f6baf39344ed1fc96736ed4f02bd93 --- /dev/null +++ b/models/pis/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d78b6b41d80c1c758e3341020d8e814a7b47fa24e5d7c8564cd1f89c38dfef +size 145482999 diff --git a/models/pis/config.json b/models/pis/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pis/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pis/vocab.txt b/models/pis/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..36fcd317e4e247b05c236c2f4fd91d13a1bb212d --- /dev/null +++ b/models/pis/vocab.txt @@ -0,0 +1,36 @@ +| +a +e +o +m +i +n +l +t +s +g +k +f +h +u +b +d +r +p +y +w +v +j +- +0 +' +1 +2 +4 +9 +5 +6 +3 +7 +8 + diff --git a/models/pjt/G_100000.pth b/models/pjt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..502e5b5cc0b561c68bde883c23391c652c48cd2e --- /dev/null +++ b/models/pjt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eeef51000ab36ccfe2f38732b5dc05c0da5fd230d7217ae49327872a55ee9c4 +size 145489925 diff --git a/models/pjt/config.json b/models/pjt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pjt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pjt/vocab.txt b/models/pjt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9a71741b676d501f9f79b7b879a268d154c82130 --- /dev/null +++ b/models/pjt/vocab.txt @@ -0,0 +1,45 @@ +y +d +m +v +c +z +6 +ḻ +4 +— +b +_ +p +' +3 +ṟ +ṉ +9 +5 +– +s +w +8 +h +l +o +g +x +1 +u +- +n +f +ṯ +r + +2 +0 +e +j +t +7 +i +k +a diff --git a/models/pkb/G_100000.pth b/models/pkb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b0d47174faadc1bb316e30f6b6ee83c32b29ed99 --- /dev/null +++ b/models/pkb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac7c802f9f4df91bbed5336dbe95b5a48e3e6b1cb22fe76bc18393c0432c6d6 +size 145482227 diff --git a/models/pkb/config.json b/models/pkb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pkb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pkb/vocab.txt b/models/pkb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..751869886f7702197f62c706225139929fe8c9cf --- /dev/null +++ b/models/pkb/vocab.txt @@ -0,0 +1,35 @@ +a +| +u +i +n +w +e +k +m +y +o +h +s +d +z +t +g +b +̯ +r +v +f +j +l +p +ḍ +c +ḅ +- +0 +1 +4 +2 +' + diff --git a/models/pls/G_100000.pth b/models/pls/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d0f08ba46425ed446438e227a9f8c84d59d32140 --- /dev/null +++ b/models/pls/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c6090004cce04abddc612332e93cef385f0e582baa2c6789f2e46d57f57493 +size 145489925 diff --git a/models/pls/config.json b/models/pls/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pls/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pls/vocab.txt b/models/pls/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2c8d8a794cc12a09a0e3f0a930f4165832d93b68 --- /dev/null +++ b/models/pls/vocab.txt @@ -0,0 +1,45 @@ +7 + +a +6 +4 +t +m +l +s +e +k +- +q +_ +x +b +ꞌ +' +3 +1 +ú +r +z +g +p +v +d +o +u +̱ +í +0 +i +f +ó +y +h +é +c +n +2 +— +ñ +á +j diff --git a/models/plw/G_100000.pth b/models/plw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f4f4e5f84c122667708ddd97d6246a756d9ca0f --- /dev/null +++ b/models/plw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2486a21c713baf78a1f334a42a8798f369d1421a163b1fa973971ede5a46f220 +size 145479015 diff --git a/models/plw/config.json b/models/plw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/plw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/plw/vocab.txt b/models/plw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4cc9696828e3841fdfb4f8cc567e7f6853526ec9 --- /dev/null +++ b/models/plw/vocab.txt @@ -0,0 +1,31 @@ +i +a +3 +t +6 +ʼ +' + +4 +p +1 +- +w +h +n +k +u +y +r +2 +d +g +b +s +e +m +j +_ +l +0 +o diff --git a/models/pmf/G_100000.pth b/models/pmf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ab72b14e1fd2106330f8bcded6ccdf3601220d2 --- /dev/null +++ b/models/pmf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc57129f73811ccfc5b8164562b58af927fc3312dba161e1aba5a851d26646ac +size 145476099 diff --git a/models/pmf/config.json b/models/pmf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pmf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pmf/vocab.txt b/models/pmf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6386f1baecd988aa572c141acaef4895a1179a1e --- /dev/null +++ b/models/pmf/vocab.txt @@ -0,0 +1,27 @@ +a +| +o +i +n +m +e +t +u +k +s +p +r +g +l +y +w +' +b +d +j +h +- +z +c +f + diff --git a/models/pny/G_100000.pth b/models/pny/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1643b6ef35e9c3067f184d1a74b907bb70fe1839 --- /dev/null +++ b/models/pny/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb953761449ea76500297f7108af68fc0f898e95ff7e2c6f37068cab4cf022a5 +size 145497577 diff --git a/models/pny/config.json b/models/pny/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pny/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pny/vocab.txt b/models/pny/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f061ca8f2138a31cb3a97c30c46140faa44a9e57 --- /dev/null +++ b/models/pny/vocab.txt @@ -0,0 +1,55 @@ +| +ə +̀ +n +ɨ +a +m +p +à +t +w +l +h +o +ŋ +u +b +s +ù +ʼ +g +k +y +i +r +z +e +ò +ẅ +d +̂ +è +f +ì +c +â +ô +ǎ +j +û +ě +̌ +ǹ +ǔ +î +ǐ +v +' +ǒ +x +q +- +1 +2 + diff --git a/models/poh-dialect_eastern/G_100000.pth b/models/poh-dialect_eastern/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..609bce3771f3312709ca4ed74a23e8bfbcb3f6a2 --- /dev/null +++ b/models/poh-dialect_eastern/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3089c94934aad82ae5565ed2baa4f2abc233e8986c47beb23ec8bb1ec61a42c +size 145483889 diff --git a/models/poh-dialect_eastern/config.json b/models/poh-dialect_eastern/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/poh-dialect_eastern/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/poh-dialect_eastern/vocab.txt b/models/poh-dialect_eastern/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9c3ba23ee9b6e350b1659430710048d09b5d0823 --- /dev/null +++ b/models/poh-dialect_eastern/vocab.txt @@ -0,0 +1,37 @@ +h +e +p +ó +á +ú +— + +y +_ +l +a +r +u +b +í +f +' +m +v +g +w +é +q +z +- +j +k +i +s +ñ +c +d +x +t +o +n diff --git a/models/poh-dialect_western/G_100000.pth b/models/poh-dialect_western/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..51804a399df57b8a6dc5d06f2d632aefb021202c --- /dev/null +++ b/models/poh-dialect_western/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860ca491285afe3ca5e764fa72d3a4cb66c1ce3de09b9d68e2597f2812c4d3c8 +size 145477601 diff --git a/models/poh-dialect_western/config.json b/models/poh-dialect_western/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/poh-dialect_western/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/poh-dialect_western/vocab.txt b/models/poh-dialect_western/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..20b5816d3a187c5934879750db24ae9321b9ffb8 --- /dev/null +++ b/models/poh-dialect_western/vocab.txt @@ -0,0 +1,29 @@ +| +i +a +ꞌ +c +h +e +r +n +o +j +k +u +l +t +w +̱ +m +b +x +s +q +p +y +d +- +z +— + diff --git a/models/poi/G_100000.pth b/models/poi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2a5b1a30423a77a6a0dcd57a9a1c13e2ce39e502 --- /dev/null +++ b/models/poi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f4fdeb4afe0cff722f5d5ffea3fb6755f04aaed08b60149879c26064ead868 +size 145485393 diff --git a/models/poi/config.json b/models/poi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/poi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/poi/vocab.txt b/models/poi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cf9f9ce431e65ca7cc4ee2a15a1df82a25b44225 --- /dev/null +++ b/models/poi/vocab.txt @@ -0,0 +1,39 @@ +ñ +e +q +r +i +w +x +́ +é +f +_ +b +k +a +̱ + +v +g +d +s +o +y +á +p +m +í +ŋ +l +ó +n +ú +j +z +- +t +ɨ +c +h +u diff --git a/models/pol/G_100000.pth b/models/pol/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..53996893b4a8e22ebed02c03d7e690e8014d6630 --- /dev/null +++ b/models/pol/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d6f4a9de92a6eb15bca8cb01826d8a9938ab6fb2c04a1c13a06d1d170c88ba6 +size 145490647 diff --git a/models/pol/config.json b/models/pol/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pol/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pol/vocab.txt b/models/pol/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ae470fc39a84be58abbf33aca29a961071b2cd7a --- /dev/null +++ b/models/pol/vocab.txt @@ -0,0 +1,46 @@ +w +h + +l +0 +c +m +ą +s +z +u +d +2 +ń +ó +4 +g +f +1 +p +9 +— +e +6 +- +k +b +8 +r +a +5 +_ +ę +y +ł +i +ż +j +o +3 +7 +n +t +ś +ć +ź diff --git a/models/por/G_100000.pth b/models/por/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7592c69eeadc4a0c207c3f4cbdf7f9e3552d8569 --- /dev/null +++ b/models/por/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b45cd2de1d2287d80ae25192b4fd2b58b10840cae91f56f248b74da723afc0 +size 145488372 diff --git a/models/por/config.json b/models/por/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/por/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/por/vocab.txt b/models/por/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..31889ac32737d53b54aa138b481be6befe6d4bd8 --- /dev/null +++ b/models/por/vocab.txt @@ -0,0 +1,43 @@ +à +ú +1 +u +l +2 +h +é +p +ã +x +' +ê +_ +s +ç +4 +v +m +- +g +q +c +z +â +í +t +e +o +i +f +b + +r +ô +n +— +ó +a +j +d +õ +á diff --git a/models/poy/G_100000.pth b/models/poy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a035fb48020cceff83a5568763d24e92dd2e04a --- /dev/null +++ b/models/poy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74fba45cf9bff678267e5d80b0704752eda01b46600347e96c463fb29219c1db +size 145476093 diff --git a/models/poy/config.json b/models/poy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/poy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/poy/vocab.txt b/models/poy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc45d57877c52dbc2a9a7317b630fe51df900504 --- /dev/null +++ b/models/poy/vocab.txt @@ -0,0 +1,27 @@ +u +a +' +t +p +g +n +e +k +y +z +s +_ +f + +r +l +v +i +j +b +m +h +o +c +w +d diff --git a/models/ppk/G_100000.pth b/models/ppk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b3f2bae6ebd257b690e1f931ecbdf5f712ade93b --- /dev/null +++ b/models/ppk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56e9d4196a3003ff253f0dd7135f24242acaccd40153c3776cc1735dc374714 +size 145476855 diff --git a/models/ppk/config.json b/models/ppk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ppk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ppk/vocab.txt b/models/ppk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3c1681b9963f8893d213c87875802b4cec4e1e34 --- /dev/null +++ b/models/ppk/vocab.txt @@ -0,0 +1,28 @@ +u +c +_ +y +a +e +s +d +l +b +p +k +f +r +i +t +o +j +g +m + +- +z +– +h +n +w +' diff --git a/models/pps/G_100000.pth b/models/pps/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9016febb85943697ed1f0fe4b9feb2a3698420fd --- /dev/null +++ b/models/pps/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef3dfa7c7597807216935879ba6970481a8e46c6a76443423fa939122251b99c +size 145490663 diff --git a/models/pps/config.json b/models/pps/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pps/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pps/vocab.txt b/models/pps/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f8e8bd17606dc09de49e1614cb97c7acf5add74f --- /dev/null +++ b/models/pps/vocab.txt @@ -0,0 +1,46 @@ +o +' +j +r +1 +t +_ +x +ó +ì +d +` +m +g +i +ú +à +h +u +n + +p +e +9 +l +— +c +ꞌ +k +f +q +ñ +s +z +y +- +b +2 +̱ +í +5 +a +á +v +ù +é diff --git a/models/prf/G_100000.pth b/models/prf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d10ef2fe7e4dcb650d45e7b76d367654021dcda --- /dev/null +++ b/models/prf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748c01bd11401673e9b63408400f6f47eda137b7609797a91d20b4cddde9ff95 +size 145483098 diff --git a/models/prf/config.json b/models/prf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/prf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/prf/vocab.txt b/models/prf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8dc53454c09f3fe3fe1198df7053dc40ecd204d1 --- /dev/null +++ b/models/prf/vocab.txt @@ -0,0 +1,36 @@ +| +a +n +i +t +k +e +u +d +y +m +g +s +p +l +o +á +b +h +r +w +j +- +c +' +f +z +v +q +0 +1 +2 +x +4 +6 + diff --git a/models/prk/G_100000.pth b/models/prk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b058e50c39f1856d0cd6afbd57bf796ab433ddc3 --- /dev/null +++ b/models/prk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a452665800c5c4a72f16cba6d482f331f4420a1fe11410c214f4e068d38dd7a2 +size 145479907 diff --git a/models/prk/config.json b/models/prk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/prk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/prk/vocab.txt b/models/prk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cbac670720ee1a7b7cdd5c2f823319c9b2f1811f --- /dev/null +++ b/models/prk/vocab.txt @@ -0,0 +1,32 @@ +r +j +d +p +h +k +w +- +t +i +6 +e +u +9 +2 +s +n +o +a + +_ +l +m +c +y +v +f +g +x +z +4 +b diff --git a/models/prt/G_100000.pth b/models/prt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..17369311fa3d7f7639038b968bddd77c6f431fd2 --- /dev/null +++ b/models/prt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db1bdced686cfc379e8e975ff99f3a9a8406488e5c9daf014676cc4b56e352a +size 145497579 diff --git a/models/prt/config.json b/models/prt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/prt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/prt/vocab.txt b/models/prt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..437d201f11798cfd9b853e20ee74af5bfcdc28ba --- /dev/null +++ b/models/prt/vocab.txt @@ -0,0 +1,55 @@ +1 +6 +้ +_ +‍ +ร +ญ +อ +ั +ป +พ +ื +น +จ +ฟ +ค +ท +ุ +บ +ไ +ว +0 +ใ +แ +ง +3 +5 +๊ +ะ +ย +9 +4 +า +ี +ก +ฮ +ห +ต + +ิ +ู +็ +ด +8 +ึ +เ +2 +7 +- +ํ +' +ล +โ +ซ +ม diff --git a/models/pse/G_100000.pth b/models/pse/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf0441f167b3837c0617827caf6a4a447c09a9fd --- /dev/null +++ b/models/pse/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf279ec074a21852f849a2b0adea7e264522ace968f498c7e07c506122942c2 +size 145473659 diff --git a/models/pse/config.json b/models/pse/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pse/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pse/vocab.txt b/models/pse/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1eee4de7a8322e21ae66548486e90082d3a091f2 --- /dev/null +++ b/models/pse/vocab.txt @@ -0,0 +1,24 @@ +h +d +j +g +b +i +- +n +c +w +p +' +m +e +a +k +s + +u +r +y +_ +l +t diff --git a/models/pss/G_100000.pth b/models/pss/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a883a29a8ff008c2729c646794523e8a66b04126 --- /dev/null +++ b/models/pss/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c4f14be80c75af32f10518347dda0e804ad0706487f2fb5554132e85bb2894 +size 145473783 diff --git a/models/pss/config.json b/models/pss/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pss/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pss/vocab.txt b/models/pss/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a2824824c78a21736211c471202f96dacb7a6773 --- /dev/null +++ b/models/pss/vocab.txt @@ -0,0 +1,24 @@ +g +t +a +p +o +u +y +n +k +' +s +h +l +d +m +e +– +- +r + +i +w +b +_ diff --git a/models/ptu/G_100000.pth b/models/ptu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f02173f6e25e47daa593e903e546280a2c07e2ea --- /dev/null +++ b/models/ptu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e393fb71b853f8939056f40c21b31de5a96cf0aaf46f72d0273941ac31e3d10 +size 145476851 diff --git a/models/ptu/config.json b/models/ptu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ptu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ptu/vocab.txt b/models/ptu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..81bfdd3534ff1d9c4e5a0ff92d43f50c62c001e9 --- /dev/null +++ b/models/ptu/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +i +u +m +l +o +t +s +' +k +e +p +d +g +b +h +ä +y +- +r +j +w +f +z +c + diff --git a/models/pui/G_100000.pth b/models/pui/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9670681c904008c4ffefb63d0182286b42fb67d6 --- /dev/null +++ b/models/pui/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8245d914b9252674b67af14609ed39a08878ee82897adf0b7047c0954b78db0b +size 145487602 diff --git a/models/pui/config.json b/models/pui/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pui/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pui/vocab.txt b/models/pui/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c28dfad4f2ea9e3b11a78e743fc28b5528722fef --- /dev/null +++ b/models/pui/vocab.txt @@ -0,0 +1,42 @@ +í +0 +ü +á +m +_ +n +d +x +ó +p +a +k +2 +h +j +y +l +b +ú +t +v +ñ +é +ŕ +z +o +w +q +- +4 +e +7 + +i +u +f +c +g +9 +s +r diff --git a/models/pwg/G_100000.pth b/models/pwg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ef03e1686179f3d9ed54cc7c61f3bc017cb5b3d8 --- /dev/null +++ b/models/pwg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:559bbba32019719fb173a645b4bf92d18f8d715896d768b2f4f9bd5f3fad1c42 +size 145484541 diff --git a/models/pwg/config.json b/models/pwg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pwg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pwg/vocab.txt b/models/pwg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a690f3136627040906d2317ad88a6d61c3528d16 --- /dev/null +++ b/models/pwg/vocab.txt @@ -0,0 +1,38 @@ +s +4 +b +e +x +7 +1 +' +d +p +c +9 +6 +w +h +0 +j +u +m + +3 +n +_ +5 +y +t +f +r +g +k +l +8 +v +o +z +i +a +2 diff --git a/models/pww/G_100000.pth b/models/pww/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cbf23a461abac3ecd633d51ee9747f49f4624f99 --- /dev/null +++ b/models/pww/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5bef07e6d665706c8de294a5b08e3039af35d0f8e9f279b96079ed86219f3d2 +size 145490677 diff --git a/models/pww/config.json b/models/pww/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pww/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pww/vocab.txt b/models/pww/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c86d694873312bca0a73c72f885e3d91e770a467 --- /dev/null +++ b/models/pww/vocab.txt @@ -0,0 +1,46 @@ +| +้ +ง +อ +ล +เ +แ +่ +๊ +ี +า +ะ +น +ท +พ +ม +ซ +ก +โ +ว +ู +บ +ฌ +ด +ย +ไ +ค +ช +จ +ื +ุ +ฆ +ิ +- +ึ +ป +ต +ร +ฮ +' +ฟ +ั +1 +ส +ผ + diff --git a/models/pxm/G_100000.pth b/models/pxm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..32440018faed9b9ad7405a967a8be13407026165 --- /dev/null +++ b/models/pxm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f758a12b1070e681fefaabcc565acf386db0ca869931ff125ee1dba616fe16a4 +size 145483767 diff --git a/models/pxm/config.json b/models/pxm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/pxm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/pxm/vocab.txt b/models/pxm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..48f736ccf3ca08681bc6a7baf08db7081bdd160e --- /dev/null +++ b/models/pxm/vocab.txt @@ -0,0 +1,37 @@ +n +e +g +t +w +h +m +l +b +k +_ +o +j +x +d +v +á +ó +f +é +u +í +ú +ñ +z + +c +' +s +ë +p +a +ä +i +y +q +r diff --git a/models/qub/G_100000.pth b/models/qub/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b058ba4f68352c179cc43822e9d86eaf1815a37 --- /dev/null +++ b/models/qub/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6140a3470fb6dc4c877149a99f066f8c74d1e9a39e72edc7d3ee2d1969e9a2 +size 145486933 diff --git a/models/qub/config.json b/models/qub/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qub/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qub/vocab.txt b/models/qub/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cecd43b47d3cf442ae6ea5779977db34119fad71 --- /dev/null +++ b/models/qub/vocab.txt @@ -0,0 +1,41 @@ +a +| +n +c +i +u +h +y +s +r +m +t +p +l +j +g +o +w +ä +e +q +d +ö +b +ñ +ï +ë +- +f +ú +ü +í +é +z +v +ó +á +' +x +k + diff --git a/models/quc-dialect_central/G_100000.pth b/models/quc-dialect_central/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2a193b343f026301e28ea246bc5fdb0d063a06d --- /dev/null +++ b/models/quc-dialect_central/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff1cab07cb2ea55d6db47084078e29e49752aec9771d1aeb906488d7764fb99 +size 145483751 diff --git a/models/quc-dialect_central/config.json b/models/quc-dialect_central/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quc-dialect_central/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quc-dialect_central/vocab.txt b/models/quc-dialect_central/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d943b3558ab17879eec4ff4ad633a879e5a5070e --- /dev/null +++ b/models/quc-dialect_central/vocab.txt @@ -0,0 +1,37 @@ +| +i +a +c +r +u +e +ꞌ +j +n +h +t +o +l +k +x +m +w +b +s +q +ä +p +z +d +y +— +ú +í +g +é +á +ó +v +f +ñ + diff --git a/models/quc-dialect_east/G_100000.pth b/models/quc-dialect_east/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d05d8d7d4ac586007d3997caffe23150ce61881 --- /dev/null +++ b/models/quc-dialect_east/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f45dc81b1b7f857ae507c9d81aa07601c2edb9e37b9fbf0033f5745bd87508a +size 145488391 diff --git a/models/quc-dialect_east/config.json b/models/quc-dialect_east/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quc-dialect_east/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quc-dialect_east/vocab.txt b/models/quc-dialect_east/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3dea07a59512f3316b097b4fea9f1f018dbb89bf --- /dev/null +++ b/models/quc-dialect_east/vocab.txt @@ -0,0 +1,43 @@ +| +a +i +r +k +e +' +u +j +h +c +n +t +l +o +q +w +b +x +m +s +p +y +z +d +g +f +– +v +- +ä +1 +0 +2 +6 +4 +7 +ñ +5 +3 +8 +— + diff --git a/models/quc-dialect_north/G_100000.pth b/models/quc-dialect_north/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2312ac27af7a28958bfdf246a98ac39b9db67f2d --- /dev/null +++ b/models/quc-dialect_north/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75272eb16f0c28eeca1e84ef9682c77bf41c76834bfc0383602572ee66dd40cc +size 145482327 diff --git a/models/quc-dialect_north/config.json b/models/quc-dialect_north/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quc-dialect_north/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quc-dialect_north/vocab.txt b/models/quc-dialect_north/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..aa3a33a3dd9bf8963b7d490d6e838ced37d0a8a2 --- /dev/null +++ b/models/quc-dialect_north/vocab.txt @@ -0,0 +1,35 @@ +| +a +' +i +k +j +e +c +h +l +u +n +r +q +o +t +x +m +b +w +s +p +y +z +d +ú +é +g +ó +í +f +v +á +ñ + diff --git a/models/quf/G_100000.pth b/models/quf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf8330daf31b0764ce9f91401b27a597e3c9edcf --- /dev/null +++ b/models/quf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085e28ed9151211d9763d03f5faf48c4a393384981c82257ef7f133b25b46aae +size 145482235 diff --git a/models/quf/config.json b/models/quf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quf/vocab.txt b/models/quf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9460af971389f2367eadeee1205fd35f0136e730 --- /dev/null +++ b/models/quf/vocab.txt @@ -0,0 +1,35 @@ +a +| +n +i +u +k +y +l +h +p +m +r +q +s +t +c +w +d +' +e +j +b +g +- +o +ñ +f +v +z +0 +x +1 +2 +4 + diff --git a/models/quh/G_100000.pth b/models/quh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0c47c47e1e57aac63dc4840b346076ed52c9530 --- /dev/null +++ b/models/quh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ccf8328c1f01ec4520cd10eb4509ce07fdceafd8ad1f9cdbb4c6dc59af0ab9c +size 145488391 diff --git a/models/quh/config.json b/models/quh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quh/vocab.txt b/models/quh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c761076488811d7cd812ec312467a4ba979cc3a1 --- /dev/null +++ b/models/quh/vocab.txt @@ -0,0 +1,43 @@ +z +á +a +ñ +l +h +t +p +o +g +2 + +d +5 +w +ú +k +é +1 +6 +4 +y +0 +f +n +c +v +' +e +í +i +x +b +_ +j +3 +q +ó +r +u +s +m +7 diff --git a/models/qul/G_100000.pth b/models/qul/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed7818c59146ead5207ba5762c1743c936619760 --- /dev/null +++ b/models/qul/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196c52d1a2c7766dfd7a54e65273bef928a7f3277b8b78f41c5aa225a60808b1 +size 145484629 diff --git a/models/qul/config.json b/models/qul/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qul/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qul/vocab.txt b/models/qul/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ec40aa38649c907d4b2d6ba481472575090f470c --- /dev/null +++ b/models/qul/vocab.txt @@ -0,0 +1,38 @@ +a +| +n +i +u +q +k +h +c +m +y +s +p +t +r +l +w +j +' +o +e +d +ñ +– +b +g +f +í +é +v +ó +ä +z +ú +á +ï +x + diff --git a/models/quw/G_100000.pth b/models/quw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d1c537fa67323a47949426d97de041ae677692e9 --- /dev/null +++ b/models/quw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42be7a76a8e4b4ca6cec655ace7aaf491a9b7c8d8e9533b7a9fec2029af01217 +size 145489239 diff --git a/models/quw/config.json b/models/quw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quw/vocab.txt b/models/quw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bb376ccd885b545481957d15718611675da2c350 --- /dev/null +++ b/models/quw/vocab.txt @@ -0,0 +1,44 @@ +a +| +i +u +n +c +h +s +r +m +p +l +t +g +j +d +y +ñ +o +e +q +b +z +v +ú +f +í +ź +é +ü +á +ó +0 +1 +2 +x +ï +— +' +k +- +ä +ḿ + diff --git a/models/quy/G_100000.pth b/models/quy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..105a3b93cde1f2efda3ad99c297f73e0a14ad5c0 --- /dev/null +++ b/models/quy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24576f01e5ff48581925b6f6b8a083bfc3716eda5b8d782723bb58edb5145e33 +size 145482887 diff --git a/models/quy/config.json b/models/quy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quy/vocab.txt b/models/quy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..aa07815af12eb9d107e6a50fcd33850372a5aa66 --- /dev/null +++ b/models/quy/vocab.txt @@ -0,0 +1,36 @@ +a +_ +o +g +s +f +w +z +c +i +u +j +d +r +ó +ñ +q +m +á +e +v +k +p +n +l +b +x +- +í + +é +t +y +h +— +ú diff --git a/models/quz/G_100000.pth b/models/quz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..747d3973340a0cd7a1943b625195159af6fd289d --- /dev/null +++ b/models/quz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58848a9b0ddea03cf6f3a2d63d2373564c08c6c57c4242057aee296b5a92e5a2 +size 145483782 diff --git a/models/quz/config.json b/models/quz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/quz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/quz/vocab.txt b/models/quz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4c92935cde7905d4c5f08c968d5897a8f31e7801 --- /dev/null +++ b/models/quz/vocab.txt @@ -0,0 +1,37 @@ +a +| +n +i +u +q +k +s +h +p +y +c +t +m +r +l +o +w +e +ñ +' +d +j +g +b +- +– +v +f +í +z +é +á +ó +ú +x + diff --git a/models/qvc/G_100000.pth b/models/qvc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0c224e99ebc905e10fe312e36a9f9a77c2b1396e --- /dev/null +++ b/models/qvc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5acfd80d084f538dca2be89bb422c247303937b0f644676c9748204d996ad20e +size 145487483 diff --git a/models/qvc/config.json b/models/qvc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvc/vocab.txt b/models/qvc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..eb8a5365ff1e8f63b336197dbe2a294f0cd0d7a0 --- /dev/null +++ b/models/qvc/vocab.txt @@ -0,0 +1,42 @@ +a +| +i +n +u +h +y +s +q +k +p +m +l +r +c +t +d +w +b +g +j +o +ñ +— +f +e +0 +á +4 +2 +˻ +˼ +í +1 +5 +' +3 +8 +9 +7 +6 + diff --git a/models/qve/G_100000.pth b/models/qve/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a0288f422e2b851bdd3d6bdaa9d3832a799627b --- /dev/null +++ b/models/qve/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9409a1ec5ffcc18e8bedfd6eca39e71012461b1b4f27b56832204c1541bceb +size 145486045 diff --git a/models/qve/config.json b/models/qve/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qve/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qve/vocab.txt b/models/qve/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..71deada434a78e4c4a6ac4fee74bf436c3c8e658 --- /dev/null +++ b/models/qve/vocab.txt @@ -0,0 +1,40 @@ +a +| +n +i +u +s +k +q +h +p +c +t +y +r +m +l +o +e +w +d +ñ +j +b +' +g +v +f +á +— +z +í +ó +é +- +ú +x +0 +1 +2 + diff --git a/models/qvh/G_100000.pth b/models/qvh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c50fc03b130c53316d1f508efda84804ea5bd4d --- /dev/null +++ b/models/qvh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ac94a3a305ef7ca36ecd4a644dc61e0578b92f80804e12f52923e1f525209a +size 145486817 diff --git a/models/qvh/config.json b/models/qvh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvh/vocab.txt b/models/qvh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..788f578bc0450d7b8fd9e19248ee632540f27f74 --- /dev/null +++ b/models/qvh/vocab.txt @@ -0,0 +1,41 @@ +ë +f +h +ü +z +o +x +ñ +ä +y +r +d +i +t +ö +k +l +_ +q +u +ï +n +c +v + +é +ó +b +g +j +í +- +ú +m +p +e +á +s +a +' +w diff --git a/models/qvm/G_100000.pth b/models/qvm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5dabaed7b910a3e5a7de1dfbb8fe2fd4c4624fa1 --- /dev/null +++ b/models/qvm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d0b9b644ba225f6ec5eb96958754ce795e6c95265c5375d8f04bb22fcadf247 +size 145486823 diff --git a/models/qvm/config.json b/models/qvm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvm/vocab.txt b/models/qvm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b9e4b6519521ac16922d28b603c2066cd88fda09 --- /dev/null +++ b/models/qvm/vocab.txt @@ -0,0 +1,41 @@ +a +| +n +i +c +u +s +t +g +y +p +r +m +h +o +l +e +ä +w +q +j +d +b +ö +ë +f +v +- +ï +ú +z +á +ü +é +ñ +í +ó +x +' +k + diff --git a/models/qvn/G_100000.pth b/models/qvn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..69cde4a7677374ff31e9668d080c5715a0afea79 --- /dev/null +++ b/models/qvn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf6b6ba629ab65138dd4b7e263663f9657752765d56df3e5b0f9b81c2e205c0 +size 145487595 diff --git a/models/qvn/config.json b/models/qvn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvn/vocab.txt b/models/qvn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7b42f9fc8d70bc41a9630af3d14a1d3f02889cd4 --- /dev/null +++ b/models/qvn/vocab.txt @@ -0,0 +1,42 @@ +a +| +n +c +u +i +r +y +m +g +h +s +p +t +ä +l +o +w +j +e +d +q +b +ü +ï +- +v +z +ë +f +ñ +ö +é +ú +í +' +á +ó +x +— +k + diff --git a/models/qvo/G_100000.pth b/models/qvo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5bc5071dcd017138adf56da1b6b78e1407596ae7 --- /dev/null +++ b/models/qvo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71be1232eee66544f9279fd12e5d8d3328f7fd02c419ce7ef93e39d787bf5c13 +size 145482983 diff --git a/models/qvo/config.json b/models/qvo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvo/vocab.txt b/models/qvo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..01aec67a54d1a5ffee6fcf279bd3ab3d5fb6873d --- /dev/null +++ b/models/qvo/vocab.txt @@ -0,0 +1,36 @@ +a +c +| +i +u +n +h +s +p +t +r +m +l +y +q +o +ñ +j +e +d +— +b +- +g +f +v +z +' +ó +x +í +á +k +ú +é + diff --git a/models/qvs/G_100000.pth b/models/qvs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7c4823332471f5afcd05b342e549cd76184957b --- /dev/null +++ b/models/qvs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c947376c6eab5a0007b755cdfcbf1ae5a3bb35f5493d42cedd9150941ac362df +size 145479141 diff --git a/models/qvs/config.json b/models/qvs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvs/vocab.txt b/models/qvs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3455d89a6679be3ae4f175e22f534be0f7e686ae --- /dev/null +++ b/models/qvs/vocab.txt @@ -0,0 +1,31 @@ +a +| +n +k +i +u +h +s +t +p +y +c +r +l +m +w +e +d +o +j +ñ +b +g +v +— +f +í +á +x +z + diff --git a/models/qvw/G_100000.pth b/models/qvw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1bdcaa244f367641caa9bed22a4a14f048adc422 --- /dev/null +++ b/models/qvw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0555d6b84fa341008fb474056c4f441870e20eb03365ccb97f6daeda7213ad1 +size 145490673 diff --git a/models/qvw/config.json b/models/qvw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvw/vocab.txt b/models/qvw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4722a0ac5bcb7e62eb97d13c18a76609ce60d906 --- /dev/null +++ b/models/qvw/vocab.txt @@ -0,0 +1,46 @@ +a +| +c +n +i +u +l +y +h +m +p +t +s +ä +w +ć +q +ś +r +ü +j +ñ +d +ï +' +o +á +e +- +b +g +f +ú +í +v +z +ë +é +˻ +˼ +x +k +ó +— +ö + diff --git a/models/qvz/G_100000.pth b/models/qvz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4c8e29326f315ec46bad141650fc176bbd67c1e0 --- /dev/null +++ b/models/qvz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae661f609cb808bd7eed225f08c353e262ab1d0730e7c0047d2acf9652588607 +size 145482213 diff --git a/models/qvz/config.json b/models/qvz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qvz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qvz/vocab.txt b/models/qvz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0a8b70e9731597976a86d41f973316f8c8c4a4b3 --- /dev/null +++ b/models/qvz/vocab.txt @@ -0,0 +1,35 @@ +a +| +i +u +c +n +h +s +t +r +m +g +l +p +y +d +o +ñ +b +e +q +j +z +á +v +f +- +ü +í +ó +ú +é +x +k + diff --git a/models/qwh/G_100000.pth b/models/qwh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd0756222db467086999db6351a38784cd3836d2 --- /dev/null +++ b/models/qwh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a5c9686549ea467436dfd50b64d2f2ef3f8ed8ab27c4fbb9ce0c24d8449e22 +size 145483779 diff --git a/models/qwh/config.json b/models/qwh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qwh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qwh/vocab.txt b/models/qwh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d90900fdfee34528b5253e5917ed5f0502480549 --- /dev/null +++ b/models/qwh/vocab.txt @@ -0,0 +1,37 @@ +a +| +n +i +u +c +q +s +t +r +e +m +p +l +o +y +h +w +ä +d +j +b +ë +g +ï +— +f +ñ +ö +ü +z +v +˻ +˼ +x +' + diff --git a/models/qxh/G_100000.pth b/models/qxh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..79c9978d1763480ddce40626cd750f78d5d750f0 --- /dev/null +++ b/models/qxh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9034e6cb836a7309a7578950c44d6188ffe3993e399fb41a63b3af2fb6f8fa6 +size 145487607 diff --git a/models/qxh/config.json b/models/qxh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qxh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qxh/vocab.txt b/models/qxh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f16161ac7c8853324920072269a86956dff021f4 --- /dev/null +++ b/models/qxh/vocab.txt @@ -0,0 +1,42 @@ +a +| +n +i +u +c +h +s +r +p +t +m +y +l +j +w +q +ä +g +ć +d +o +e +- +ñ +ú +ç +ü +ï +b +é +í +f +ó +á +z +v +x +k +2 +­ + diff --git a/models/qxl/G_100000.pth b/models/qxl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..49f93a64aebb1c2686c0ffa2b9c14eddceed10e2 --- /dev/null +++ b/models/qxl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89b2d0d1fb8f4ea5b169d3cf9dc07798810aecc2eaa8b50c05fe042a2f4c2361 +size 145484543 diff --git a/models/qxl/config.json b/models/qxl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qxl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qxl/vocab.txt b/models/qxl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..225f05e9b886320f12026cdb6fc75f93747834e1 --- /dev/null +++ b/models/qxl/vocab.txt @@ -0,0 +1,38 @@ +í +– +- +n +w +k +ú +z +' +q +r +d +c +ó +p +é +m +e +_ +h +t +x +o +f +u +s +i +ñ + +v +ü +y +l +b +á +a +j +g diff --git a/models/qxn/G_100000.pth b/models/qxn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..26f15c25217e413ab352231f71fd4751b883bcbd --- /dev/null +++ b/models/qxn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13aed48acd7bfd8bf14d723a35c68aa055b01324f76722537d9a7bdd0479b93 +size 145486855 diff --git a/models/qxn/config.json b/models/qxn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qxn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qxn/vocab.txt b/models/qxn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..09d9f614c2b8b3f32eb584745da8b28e58b7c797 --- /dev/null +++ b/models/qxn/vocab.txt @@ -0,0 +1,41 @@ +a +| +n +i +u +c +s +y +t +r +g +m +p +l +o +e +h +ä +w +q +ö +d +j +ñ +b +ï +f +v +ë +ü +z +é +í +ó +á +ú +x +k +' +- + diff --git a/models/qxo/G_100000.pth b/models/qxo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a173c47e6a9ab5d32a4519258766758fef13d48a --- /dev/null +++ b/models/qxo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024aec6804ea8cd48ed814cd6be28dc414fba33f1e614dc0fa5915336219dc9d +size 145485311 diff --git a/models/qxo/config.json b/models/qxo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qxo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qxo/vocab.txt b/models/qxo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..36bfa5a4967867dd8782b60a00dc6dc57828c7aa --- /dev/null +++ b/models/qxo/vocab.txt @@ -0,0 +1,39 @@ +a +| +n +i +u +c +t +y +s +r +m +g +p +l +ä +h +o +e +ö +w +q +d +j +ñ +b +ï +v +ë +f +ü +z +- +0 +' +x +1 +2 +k + diff --git a/models/qxr/G_100000.pth b/models/qxr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..23622c49e5e1e8cd3dae1440294d89d83d9d5f47 --- /dev/null +++ b/models/qxr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5190202887d260e86fece4626104db655f6f8f1026ebdfd90f5fa0391e1cc13 +size 145483011 diff --git a/models/qxr/config.json b/models/qxr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/qxr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/qxr/vocab.txt b/models/qxr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..77c09ef5c8f43ee2476e28e8340918e5b9bac82c --- /dev/null +++ b/models/qxr/vocab.txt @@ -0,0 +1,36 @@ +o +ü +v +f +ó +k + +n +g +ú +x +p +— +m +d +q +- +z +ñ +_ +é +á +i +u +t +a +b +j +y +h +e +s +l +c +í +r diff --git a/models/rah/G_100000.pth b/models/rah/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c21a68afa9f1c5aef7ef6f6765a0762b5500aeee --- /dev/null +++ b/models/rah/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bace8d3b87ed046a4f06418fae6269e4a9a0079411754ae972f1bbae7c71cc4d +size 145486075 diff --git a/models/rah/config.json b/models/rah/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rah/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rah/vocab.txt b/models/rah/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5c0a9108f62c99960d83dd1a36c79277f6a79c26 --- /dev/null +++ b/models/rah/vocab.txt @@ -0,0 +1,40 @@ +ক +চ +ে +া +ব +ই +য +এ +ঃ +ৰ +হ +ৱ +ৗ +প +দ +থ +জ +ম +খ +গ +_ +ল +ং +্ +ছ +ভ +ু +ি +ো +অ +ন +আ +' +ও +ত +ঙ +ফ + +উ +় diff --git a/models/rai/G_100000.pth b/models/rai/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8067a1f0372594cdaf0c879f22af4ca72eb17e9f --- /dev/null +++ b/models/rai/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12af5cfd42f4ae96c4446b9c3f7aa8da1a46e73ca2831b1e885c8ae2129a9db2 +size 145480019 diff --git a/models/rai/config.json b/models/rai/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rai/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rai/vocab.txt b/models/rai/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c8fb3571881d49c8e39351600e92ee9e38ed3158 --- /dev/null +++ b/models/rai/vocab.txt @@ -0,0 +1,32 @@ +a +| +i +n +u +t +m +r +k +p +o +d +g +b +l +w +e +s +1 +2 +0 +7 +4 +6 +5 +3 +' +8 +9 +- +f + diff --git a/models/rap/G_100000.pth b/models/rap/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..78cbb74aa00191a02eda38dbcd228b07e22138e0 --- /dev/null +++ b/models/rap/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c10ee6d5be1fb88ad40308392f5382e0a83a0ad91249a08756bd14a83c113118 +size 145483743 diff --git a/models/rap/config.json b/models/rap/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rap/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rap/vocab.txt b/models/rap/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5f170e51e1f6e44620796a934b0519b0bd4fb5e0 --- /dev/null +++ b/models/rap/vocab.txt @@ -0,0 +1,37 @@ +l +á +ŋ +d +b +ī +n +j +g +h +ū +— +o +k +ó +s +e +f +ā +- +m +_ +u +p + +a +ē +ō +' +í +v +z +r +c +ꞌ +t +i diff --git a/models/rav/G_100000.pth b/models/rav/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4a99f5576cfec13e7a2ef07538e47da477f64ada --- /dev/null +++ b/models/rav/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b021d291ffacd1390c944eaa1370b15cc1201798ebd2424cb43f1c98ad1da8d +size 145498357 diff --git a/models/rav/config.json b/models/rav/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rav/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rav/vocab.txt b/models/rav/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..22593b3fcfb367b26467fb73d29189d10aaf6981 --- /dev/null +++ b/models/rav/vocab.txt @@ -0,0 +1,56 @@ +ा +| +ी +म +् +क +न +ू +‍ +ल +त +स +ख +प +ँ +ो +च +व +र +ई +य +ब +ङ +े +आ +छ +ह +ध +द +ऊ +ग +ए +झ +अ +भ +थ +ज +ट +फ +ढ +ड +ः +ठ +़ +घ +ओ +ं +' +- +ृ +उ +ऐ +इ +ण +श + diff --git a/models/raw/G_100000.pth b/models/raw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b3c35905635eb13d0f58b83367a88faa2da4d852 --- /dev/null +++ b/models/raw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbe9f5a917e2db89b5189df711fc5214e3b43d7f2cbca450352ead7e5a854c34 +size 145496811 diff --git a/models/raw/config.json b/models/raw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/raw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/raw/vocab.txt b/models/raw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..afd6f5b3f461aab19ae36f0b9d334e45dd7c46ce --- /dev/null +++ b/models/raw/vocab.txt @@ -0,0 +1,54 @@ +u +è +4 +ú +á +' +y +ā +ù +- +é +c +d +ò +f +à +w +í +z +q +2 +t +n +a +ó +v +j +m + +r +́ +ø +s +_ +i +l +ō +b +̀ +1 +h +ï +ū +̄ +g +ì +p +0 +ē +k +ǿ +6 +e +o diff --git a/models/rej/G_100000.pth b/models/rej/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a302db6aed7318a3af9e331a76cd3d41bf524aff --- /dev/null +++ b/models/rej/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5361bc07df1ae951ad3a199f3f11c57dd7a4e2eedbac7f72fb13afc31f1d533b +size 145480687 diff --git a/models/rej/config.json b/models/rej/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rej/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rej/vocab.txt b/models/rej/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b22700400c5f28ea9f23c4bd0652f5eec8dbea19 --- /dev/null +++ b/models/rej/vocab.txt @@ -0,0 +1,33 @@ +e +d +b +v +_ +6 +z +m +1 +s +g +a +h +w +t +i +0 +p +y +' +- +4 +r + +o +j +u +n +2 +l +k +f +c diff --git a/models/rel/G_100000.pth b/models/rel/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd5526bf28c22cd2052adc8287897d06eb8d7cac --- /dev/null +++ b/models/rel/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1e43fff8d7ae5a0e53fe0e1a5775748c191ad88cebddc31648e9ba07bb610a +size 145477713 diff --git a/models/rel/config.json b/models/rel/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rel/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rel/vocab.txt b/models/rel/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8b77c719f7f7ae6c7dbf4c692b6b09ffd5397a7f --- /dev/null +++ b/models/rel/vocab.txt @@ -0,0 +1,29 @@ +o +e +l +h +f +b +m +' +é +s +ú +a +ó +r +u +i +n +k +t +d +á +g +c +j +w +y +í + +_ diff --git a/models/rgu/G_100000.pth b/models/rgu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d81ad871cfa71ca8164adf1bfe4c308accc6bee --- /dev/null +++ b/models/rgu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36b94d0d71673a12188e30f93c39cb8e31ab42437cc19341d778ec970412fbeb +size 145473873 diff --git a/models/rgu/config.json b/models/rgu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rgu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rgu/vocab.txt b/models/rgu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e21e8b188bad7b39fca0f7fe0c1bb7db6ef2312e --- /dev/null +++ b/models/rgu/vocab.txt @@ -0,0 +1,24 @@ +l +_ +d +a +n +y +c +t +b +h +k +s +r +f +e +g +p +m + +- +' +i +u +o diff --git a/models/rhg/G_100000.pth b/models/rhg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..56db6b88b52b8f8f74e44c8ee831590ee4e92a8b --- /dev/null +++ b/models/rhg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5586224e1a48b857f30fb158c04c0fd0d372405a96ef9e7e951e0d1324633f46 +size 145482195 diff --git a/models/rhg/config.json b/models/rhg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rhg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rhg/vocab.txt b/models/rhg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..67784e17da0a9339dbf514514a6113a0fb7a39ef --- /dev/null +++ b/models/rhg/vocab.txt @@ -0,0 +1,35 @@ +t +n +s +j +e +l +u +í +f + +ñ +z +o +p +v +m +ú +_ +a +y +g +ç +i +r +c +w +h +é +b +á +- +' +d +ó +k diff --git a/models/rif-script_arabic/G_100000.pth b/models/rif-script_arabic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2b24b2ceff875409f4b7fb3b0b2ffe10cd70e992 --- /dev/null +++ b/models/rif-script_arabic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b8ea45eb25c498bfbd22ac7f2aa3c163ae49ed9c7d19378cf7d989e90be1796 +size 145488397 diff --git a/models/rif-script_arabic/config.json b/models/rif-script_arabic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rif-script_arabic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rif-script_arabic/vocab.txt b/models/rif-script_arabic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4d6bdbd29ef4cd90a6d790f3dc5450c8f41617ab --- /dev/null +++ b/models/rif-script_arabic/vocab.txt @@ -0,0 +1,43 @@ +ڒ +ذ +ط +پ +ث +ت +و +ز +ݣ +_ +س +ظ +ض +ژ +آ +ٱ +ص +- +ى +أ +ا +ك + +ع +ي +ن +ج +ئ +د +خ +م +ب +ؤ +ء +إ +ه +ر +ف +غ +ق +ح +ل +ش diff --git a/models/rif-script_latin/G_100000.pth b/models/rif-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a20af376ca064c59d0a2216035ad775bced9a98 --- /dev/null +++ b/models/rif-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a93ed16bea167ad4d2a5f39a13148c864de528b7419b47b46b1eeeaff1952832 +size 145486820 diff --git a/models/rif-script_latin/config.json b/models/rif-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rif-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rif-script_latin/vocab.txt b/models/rif-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..20e5069d4c536e7d1ac26c83bc464879cfe1f692 --- /dev/null +++ b/models/rif-script_latin/vocab.txt @@ -0,0 +1,41 @@ +| +a +n +i +e +m +r +s +ḏ +u +w +ṯ +y +b +t +ȓ +c +ɣ +l +d +ƹ +q +j +z +k +ḥ +- +f +x +g +ṣ +ṭ +h +ḍ +ǧ +ʼ +ẓ +p +č +' + diff --git a/models/ril/G_100000.pth b/models/ril/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8c0184a37ad6a33d8c9ce211f27cb481b684af80 --- /dev/null +++ b/models/ril/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429e85e78c285e48ec9a508a350376c9be16c89daef3e2702eb925e2e2faf923 +size 145476061 diff --git a/models/ril/config.json b/models/ril/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ril/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ril/vocab.txt b/models/ril/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..35ecd7b5fb13fb8130311550fe18a09d4fc71220 --- /dev/null +++ b/models/ril/vocab.txt @@ -0,0 +1,27 @@ +| +a +z +h +e +n +u +k +i +t +m +w +r +p +o +g +l +s +d +y +c +b +9 +1 +0 +' + diff --git a/models/rim/G_100000.pth b/models/rim/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..12769a232b621cc2c70d919ffd42765cf5720f08 --- /dev/null +++ b/models/rim/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa75918e6c774a444ae8cf1e441128c53a96f2f59b0e7a08d3e5466b34f5417c +size 145478387 diff --git a/models/rim/config.json b/models/rim/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rim/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rim/vocab.txt b/models/rim/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f5a54e3cb42d29563e01be8906a3bd79131dc113 --- /dev/null +++ b/models/rim/vocab.txt @@ -0,0 +1,30 @@ +| +a +u +n +e +ë +m +i +g +o +w +t +k +y +h +s +r +v +d +f +j +b +ü +l +' +c +p +z +- + diff --git a/models/rjs/G_100000.pth b/models/rjs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1673127558ff380ccbb785f66302e485bb063d3c --- /dev/null +++ b/models/rjs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d83660942428fd8fad8f12f796c4a64937f859b83b30e7012d234fd7aec0032 +size 145503055 diff --git a/models/rjs/config.json b/models/rjs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rjs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rjs/vocab.txt b/models/rjs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bebd1e42e4e681cd55d3848001e0d7c7f29f53db --- /dev/null +++ b/models/rjs/vocab.txt @@ -0,0 +1,62 @@ +झ +ङ +े +स +म +ग +अ +ृ +_ +ट +फ +च +ल +ऩ +भ +घ +ऱ +३ +त +ि +‍ +4 +थ +1 +ख +ह +उ +' +ओ +य +ध +ए +ड +द +ज +q +ठ +इ +ो +़ +ँ +क +3 +i +m +प +र +ु +ढ +ं +ा +ब +श +छ +६ +आ +l +व +न +् + +2 diff --git a/models/rkt/G_100000.pth b/models/rkt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..79f108dc478a282f9d012f7747894b90049d3b9d --- /dev/null +++ b/models/rkt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb9ce5e480f0b776761bab470e819541d468891e0d52d0b0e3c8d7b1bf8c7366 +size 145503727 diff --git a/models/rkt/config.json b/models/rkt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rkt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rkt/vocab.txt b/models/rkt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4213897afa996dff4df1cfb7465334db3b34a274 --- /dev/null +++ b/models/rkt/vocab.txt @@ -0,0 +1,63 @@ +ধ +এ +ঝ +ৈ +ঢ +ঁ +ত +য +ঠ +অ +ফ +ঙ +ৎ +ং +ল +া +খ +ূ +ঈ +ন +় +ছ +ট +ৌ +শ +আ + +ঐ +ষ +জ +্ +ম +ব +ৃ +ও +ঋ +ঘ +র +ণ +ভ +— +ি +ঞ +_ +ই +ু +ী +চ +ড +উ +- +প +ঔ +' +ো +ে +দ +ঃ +থ +গ +ক +হ +স diff --git a/models/rmc-script_cyrillic/G_100000.pth b/models/rmc-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0348cbc88b69df91804aa1e13b71eadd9cbc4e0 --- /dev/null +++ b/models/rmc-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1b9f57e5543c80a065db2c6cbf91690eb914cb3a8963a8273fc78039eb5fae +size 145483776 diff --git a/models/rmc-script_cyrillic/config.json b/models/rmc-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rmc-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rmc-script_cyrillic/vocab.txt b/models/rmc-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..aefe5715064fff290276443625d0cdc78b8f531f --- /dev/null +++ b/models/rmc-script_cyrillic/vocab.txt @@ -0,0 +1,37 @@ +| +а +е +н +о +р +с +л +д +і +к +в +т +п +є +м +у +г +я +й +ґ +б +ч +ш +и +ж +з +— +х +ц +ь +ї +ф +ю +щ +- + diff --git a/models/rmc-script_latin/G_100000.pth b/models/rmc-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8770fdc0af00738b033dcddb707d92482b04b873 --- /dev/null +++ b/models/rmc-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096404e4f08f2943578be5ee26e6f27d0db6daa43a298212bcc161d97879db02 +size 145486843 diff --git a/models/rmc-script_latin/config.json b/models/rmc-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rmc-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rmc-script_latin/vocab.txt b/models/rmc-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ccd5b3ff4fb77bbbafaaf4223ac5e92ac3aebc82 --- /dev/null +++ b/models/rmc-script_latin/vocab.txt @@ -0,0 +1,41 @@ +š +c +_ +b +ď +p +h +` +- +3 +ž +t + +' +ľ +z +j +5 +y +u +o +ó +x +1 +v +f +n +a +i +e +d +ť +s +m +k +ň +2 +r +č +l +g diff --git a/models/rmo/G_100000.pth b/models/rmo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d3b195aa96b5f11c03b6c04e9eedf88c07eb54d --- /dev/null +++ b/models/rmo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eba6f0cc60579b6091ebb0242ac5906e373cecf5c5f7c4a31f2e2bbf1d4906d +size 145476861 diff --git a/models/rmo/config.json b/models/rmo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rmo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rmo/vocab.txt b/models/rmo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2d3731842151ea2a8cac438fe4725818860ac834 --- /dev/null +++ b/models/rmo/vocab.txt @@ -0,0 +1,28 @@ +| +e +a +n +o +l +r +s +i +k +t +h +d +u +p +m +w +j +b +c +g +f +- +v +z +x +y + diff --git a/models/rmy-script_cyrillic/G_100000.pth b/models/rmy-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e930ff2610fbd8f278e3c06627de35881f0cbc22 --- /dev/null +++ b/models/rmy-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a834a54acfded08dc508aa7ca32bc4d9eba3d714d6fdf3897bd238ba779c8b9 +size 145483773 diff --git a/models/rmy-script_cyrillic/config.json b/models/rmy-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rmy-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rmy-script_cyrillic/vocab.txt b/models/rmy-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e3c4015f3630c44cd8b3e206ae14985a13f5207b --- /dev/null +++ b/models/rmy-script_cyrillic/vocab.txt @@ -0,0 +1,37 @@ +ж +с +м +c +у +й +ч +п +- +р +ы +т +щ +я +ф +б +г +з +о +ш +а +— +и +ю +ц +в +ь +ё +д +л +е +н +к +_ +э +х + diff --git a/models/rmy-script_latin/G_100000.pth b/models/rmy-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f19e5da378a8a10b39e0663766a18610cafc3307 --- /dev/null +++ b/models/rmy-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65112aca710836b3dddbf7c174e6d52ab264b4ff15bb76283d51de2798929484 +size 145476951 diff --git a/models/rmy-script_latin/config.json b/models/rmy-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rmy-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rmy-script_latin/vocab.txt b/models/rmy-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7743b4452ca6b25368632481b7ec9cb5563a570f --- /dev/null +++ b/models/rmy-script_latin/vocab.txt @@ -0,0 +1,28 @@ +| +a +e +i +o +l +n +k +t +r +s +d +p +u +m +v +g +h +c +b +z +j +f +— +ñ +' +8 + diff --git a/models/rng/G_100000.pth b/models/rng/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..667da37ab4e5fd869b8b1147dc5f2f9279f69b3a --- /dev/null +++ b/models/rng/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6382d769b5e742d357367bc9e3b6acab0e88afdd0eeb60111e90289d447a91 +size 145489141 diff --git a/models/rng/config.json b/models/rng/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rng/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rng/vocab.txt b/models/rng/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..22d5c28b5386fd9e0d946a1f5006d081d2f9e630 --- /dev/null +++ b/models/rng/vocab.txt @@ -0,0 +1,44 @@ +| +a +i +n +u +k +e +l +b +m +h +w +o +t +y +s +g +ḍ +ŝ +š +f +' +ṛ +ṅ +p +d +ṭ +r +- +ḇ +z +v +c +ž +ṡ +ẹ +ṁ +ṣ +ï +— +ẑ +ḳ +ị + diff --git a/models/rnl/G_100000.pth b/models/rnl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..470f6ad567415cc1cc602241cbdc6da2a6de6d86 --- /dev/null +++ b/models/rnl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a31a1947a18c99907054b858c3280304ef6de8cc7df408481d6ee0b91cf1032 +size 145483017 diff --git a/models/rnl/config.json b/models/rnl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rnl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rnl/vocab.txt b/models/rnl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c60c699e93b1cb9cdcaa94581214db4b6a6fa880 --- /dev/null +++ b/models/rnl/vocab.txt @@ -0,0 +1,36 @@ +0 +_ +u +e + +q +p +ē +d +k +v +ī +f +t +a +j +z +b +o +s +i +1 +2 +n +g +l +- +c +m +ō +h +3 +ā +' +r +ū diff --git a/models/rol/G_100000.pth b/models/rol/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..60b4702417f7f915f44a30160843bebd397c95b9 --- /dev/null +++ b/models/rol/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aef17db966b73947b5c42712bf6ff63fe6bb2e98ae04d5e01cf264f562a0949 +size 145490665 diff --git a/models/rol/config.json b/models/rol/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rol/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rol/vocab.txt b/models/rol/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..28822f5177beb0432cf6d1776556eeb975035426 --- /dev/null +++ b/models/rol/vocab.txt @@ -0,0 +1,46 @@ +c +0 +w +4 +- +h +5 + +k +ñ +z +' +d +o +6 +9 +t +a +p +r +x +u +7 +2 +3 +8 +e +á +à +s +è +l +ì +f +q +1 +_ +j +y +v +ò +g +i +m +n +b diff --git a/models/ron/G_100000.pth b/models/ron/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8e07707d24c097e8ab8475a7ad97a727620acd1 --- /dev/null +++ b/models/ron/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea169ef853021596031728bf4626a1e9ef3f3e4c623edb19efde2fc5b6ba126d +size 145481301 diff --git a/models/ron/config.json b/models/ron/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ron/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ron/vocab.txt b/models/ron/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..3a03c014541dc655bb23d27de1278e8c30cf3760 --- /dev/null +++ b/models/ron/vocab.txt @@ -0,0 +1,34 @@ +c +q +u +b +g +_ +n +– +m +z +i +t +l +ă +f +ș +a +1 +î +p +- +h +s +' +x +e +j +â + +r +v +o +ţ +d diff --git a/models/rop/G_100000.pth b/models/rop/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e4b9a83785ff88493388d8f1b5ba569d2c250160 --- /dev/null +++ b/models/rop/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d974f50a812010d9a0e8202bf67244d95905d6f1b40653ca8d2e09a948dcb590 +size 145472995 diff --git a/models/rop/config.json b/models/rop/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rop/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rop/vocab.txt b/models/rop/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2cb2ee4e2f4d450135b7ee78991b3fd4d691ee61 --- /dev/null +++ b/models/rop/vocab.txt @@ -0,0 +1,23 @@ +b +f +n +r +u +j +h + +_ +d +k +i +y +m +e +a +t +w +o +l +s +g +p diff --git a/models/rro/G_100000.pth b/models/rro/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2e2dad33e26e52f87d39dc0babc317650d2ec634 --- /dev/null +++ b/models/rro/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bffe66927350c0209319c0cd808a42bb30354f10b8bc2bba016f1493b7ffca5 +size 145480563 diff --git a/models/rro/config.json b/models/rro/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rro/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rro/vocab.txt b/models/rro/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..22f6e2807ab36aa0d9afc6da94767045b5033e0d --- /dev/null +++ b/models/rro/vocab.txt @@ -0,0 +1,33 @@ +u +l + +— +t +k +5 +1 +v +4 +h +d +ṯ +e +o +f +a +_ +i +n +g +y +0 +w +9 +3 +' +ṉ +b +s +p +m +r diff --git a/models/rub/G_100000.pth b/models/rub/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd2cebd93d8752b932c7f9d37f4d0899117e9403 --- /dev/null +++ b/models/rub/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff58911bde7c7d37c6c5f7e95b02acbaaa37f27c9920a1d5a1e920eca21734f +size 145483091 diff --git a/models/rub/config.json b/models/rub/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rub/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rub/vocab.txt b/models/rub/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..26c28c66bd2e9783e834c231809c653e2ac9bf15 --- /dev/null +++ b/models/rub/vocab.txt @@ -0,0 +1,36 @@ +i +k +w +_ +u +z +b +o +t +a +ʼ +— +0 +s +e +c +- +̯ +p +4 +f +j +' +̱ + +l +r +g +y +h +d +v +n +ŋ +3 +m diff --git a/models/ruf/G_100000.pth b/models/ruf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a372804f6dfb9bfe40c22583aec243ff75798376 --- /dev/null +++ b/models/ruf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf353c57609e8a42014cc5dfb846f3cfa571e90cd88145225104002980a8700c +size 145481451 diff --git a/models/ruf/config.json b/models/ruf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ruf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ruf/vocab.txt b/models/ruf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..00d3e9d48131cd6a0afa72b824115fe434586af8 --- /dev/null +++ b/models/ruf/vocab.txt @@ -0,0 +1,34 @@ +m +z +d +3 +1 +e +g +u +0 +i +' +h +w +t +s +5 +o +v +f +_ +- +c +r +k +a +l +j +y +b +n +7 + +p +2 diff --git a/models/rug/G_100000.pth b/models/rug/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..79b523712f7b79a932128aba00e175c283b4dcdd --- /dev/null +++ b/models/rug/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07a3c38eeefae8fb12ca5cb0dd322c751bd833ca425890dd63e8602b3db873d9 +size 145474555 diff --git a/models/rug/config.json b/models/rug/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rug/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rug/vocab.txt b/models/rug/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..493763157516b1fbc6cf6b76a4a60a70e09fe5dc --- /dev/null +++ b/models/rug/vocab.txt @@ -0,0 +1,25 @@ +| +a +i +e +s +u +o +n +t +k +r +m +p +l +g +v +d +b +h +ṉ +z +q +' +- + diff --git a/models/run/G_100000.pth b/models/run/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1021713d6531893c4b858072dc98bc479618545c --- /dev/null +++ b/models/run/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:218eb0de1e5256e5799338d81ac256663268b0a3989d62c8e17d83a617709847 +size 145477719 diff --git a/models/run/config.json b/models/run/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/run/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/run/vocab.txt b/models/run/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0970310adeac6d9b457a4171ab9d43b79949d5cd --- /dev/null +++ b/models/run/vocab.txt @@ -0,0 +1,29 @@ +| +a +i +u +n +e +r +o +b +m +k +w +y +g +t +s +h +z +' +d +v +c +j +p +f +l +- +— + diff --git a/models/rus/G_100000.pth b/models/rus/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0197f497014523e266c3d5260f7de6da154555f4 --- /dev/null +++ b/models/rus/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5ccb31a5d591c8902b791f42441115d2b8241eb2b20b1748f98a75f959f5d0 +size 145489235 diff --git a/models/rus/config.json b/models/rus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/rus/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/rus/vocab.txt b/models/rus/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..8d0df6edfd962d06c43e3690cf72847fc4ae2e94 --- /dev/null +++ b/models/rus/vocab.txt @@ -0,0 +1,44 @@ +ч + +д +ь +я +й +с +у +а +о +2 +0 +ъ +ы +н +л +ж +х +ш +з +c +_ +п +m +ю +э +р +4 +щ +o +– +ф +q +и +ц +к +1 +е +б +- +т +в +г +м diff --git a/models/sab/G_100000.pth b/models/sab/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e68f79f54574c45f780e41f2c89bd43e313bfe83 --- /dev/null +++ b/models/sab/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be85c40f905ee8a5a2ff0abb3c7283ec0e26bd7444da4d902917df30abf8b23 +size 145483741 diff --git a/models/sab/config.json b/models/sab/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sab/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sab/vocab.txt b/models/sab/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..71a6e500350558a26c531d45674ffd7ac0de293f --- /dev/null +++ b/models/sab/vocab.txt @@ -0,0 +1,37 @@ +| +a +e +i +g +l +u +n +b +t +k +o +r +c +h +d +m +w +s +j +ñ +p +é +— +í +f +v +á +- +ó +z +q +y +ú +x +' + diff --git a/models/sag/G_100000.pth b/models/sag/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d93066f4e0aedc766a11f9f90efa8ecf98994369 --- /dev/null +++ b/models/sag/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c46aa46b1405f28bc4b64e9a7445416b144ca02b4fb52632a65319a694524592 +size 145486076 diff --git a/models/sag/config.json b/models/sag/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sag/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sag/vocab.txt b/models/sag/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6cb458fb00eae8c1236b12f9eee45222443a87e2 --- /dev/null +++ b/models/sag/vocab.txt @@ -0,0 +1,40 @@ +| +a +i +e +n +o +t +l +g +s +k +b +m +y +u +z +p +r +d +ë +w +f +h +â +- +j +é +ö +c +v +ê +ï +ô +– +' +q +x +è +î + diff --git a/models/sah/G_100000.pth b/models/sah/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c2f0ea2f471283699155073324a99d25479cfdcf --- /dev/null +++ b/models/sah/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c9be9089cda47712a3d06f4e0954b29ae4944fcde3f09ca7258d94c037f1f6 +size 145489135 diff --git a/models/sah/config.json b/models/sah/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sah/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sah/vocab.txt b/models/sah/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..000fa4d9a5baf069f36d35be58d2291d0ccd85d8 --- /dev/null +++ b/models/sah/vocab.txt @@ -0,0 +1,44 @@ +ю +у +й +h +р +в +ь +һ +т +– +з +с +ы +э +п +ф +н +б +я +ж +х +c +к +о +_ +a +а +ҥ +ү + +ц +ч +и +i +м +д +ҕ +е +г +3 +ө +л +- +ш diff --git a/models/saj/G_100000.pth b/models/saj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..729e9565539c76b123f9da9defb8d33da2dc522f --- /dev/null +++ b/models/saj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4419fe50d59ea2606161a802ff829473e99ea34f83c0faad6ac498d28858d070 +size 145478369 diff --git a/models/saj/config.json b/models/saj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/saj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/saj/vocab.txt b/models/saj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..39c76409a8257f1fcc43c3286c0dc4d1293b1228 --- /dev/null +++ b/models/saj/vocab.txt @@ -0,0 +1,30 @@ +ḇ +d +a +c +s +z +e +l +- +' +b +_ +k + +o +j +p +n +ḏ +h +m +i +f +r +y +̱ +g +u +t +w diff --git a/models/saq/G_100000.pth b/models/saq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5607cd53aa2cace4e79534d7efe39e7e5f3c487a --- /dev/null +++ b/models/saq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e18becce8fc950da2badf390008c9d64a637e551bc4c3f4e9c80c0f1f5fb2e09 +size 145483093 diff --git a/models/saq/config.json b/models/saq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/saq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/saq/vocab.txt b/models/saq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..696a3f4631cb69dac50f6999d82f150ed3b0f9f0 --- /dev/null +++ b/models/saq/vocab.txt @@ -0,0 +1,36 @@ +7 +u +o +k +8 +n +2 +- +h +a +m +5 +g +r +t +b +i +s +9 +l +6 +p +e +1 +c +y +' +d +_ + +0 +j +3 +꞉ +4 +w diff --git a/models/sas/G_100000.pth b/models/sas/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..acf98eca40525e0df33253cd9e4ea8bc978c2d0a --- /dev/null +++ b/models/sas/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb473529a3bfbdd8855369a12f919fe88c09630905abc8cb4550c8026be4cc7e +size 145478491 diff --git a/models/sas/config.json b/models/sas/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sas/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sas/vocab.txt b/models/sas/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9a8a45e86dbc1f89bf974b2cc1af2bfe586d46ca --- /dev/null +++ b/models/sas/vocab.txt @@ -0,0 +1,30 @@ +| +a +e +n +i +s +d +t +g +q +l +u +m +k +p +r +ẽ +b +h +o +j +- +y +w +c +' +z +f +é + diff --git a/models/sba/G_100000.pth b/models/sba/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0a9b5813813c3f442e6558b993a467da9f173a6 --- /dev/null +++ b/models/sba/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b99150bc3f0d8f4ececd5de713aa0a4c36d068b037159b68a20c764aed5c9d54 +size 145493731 diff --git a/models/sba/config.json b/models/sba/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sba/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sba/vocab.txt b/models/sba/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6c417b0f6935b0ca653b058853cf26de9acec6e4 --- /dev/null +++ b/models/sba/vocab.txt @@ -0,0 +1,50 @@ +| +a +e +ə +g +n +d +l +m +r +o +i +j +k +t +ɔ +é +s +' +́ +b +u +w +ḛ +y +ŋ +ɓ +- +̰ +í +á +p +ḭ +è +ɛ +ó +à +ò +ɗ +h +ú +ṵ +̀ +ǝ +2 +1 +v +4 +3 + diff --git a/models/sbd/G_100000.pth b/models/sbd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d9fd153b30d39004117eead6bcdf26210c53323 --- /dev/null +++ b/models/sbd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a45d95a91ae0c3137ce87cc97556417ad88e0a8a83d9cea88e5774170059469 +size 145494527 diff --git a/models/sbd/config.json b/models/sbd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sbd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sbd/vocab.txt b/models/sbd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..71f6c2924f22c80bf86c3722ecac9921c1372ead --- /dev/null +++ b/models/sbd/vocab.txt @@ -0,0 +1,51 @@ +| +a +n +ɛ +i +l +ɔ +k +o +b +m +d +w +e +g +s +r +u +á +t +y +́ +ǹ +p +z +ã +ə +à +̃ +‐ +̀ +f +í +ĩ +ǎ +ì +ũ +h +ò +ǐ +ù +ú +— +ń +v +è +é +ó +̌ +- + diff --git a/models/sbl/G_100000.pth b/models/sbl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b2bf8d046769081ec5d4d3fc9e09812f373a667 --- /dev/null +++ b/models/sbl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3635ea5369bb033eca0364594b2810da0d8bb751fc3b9287e91758472bebe65 +size 145477593 diff --git a/models/sbl/config.json b/models/sbl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sbl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sbl/vocab.txt b/models/sbl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5bbb58a74d322d57a061456f958fec4b47bccbbd --- /dev/null +++ b/models/sbl/vocab.txt @@ -0,0 +1,29 @@ +a +| +n +i +o +y +m +h +t +k +g +l +p +- +b +w +s +r +e +d +à +ò +è +̇ +̀ +' +ì +— + diff --git a/models/sbp/G_100000.pth b/models/sbp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfae5a94e4c799572084edb98fbe14194356e7cd --- /dev/null +++ b/models/sbp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11738fb74ca98de60753322362a474557f992c375be84e1b43cc81bc816bf49b +size 145476105 diff --git a/models/sbp/config.json b/models/sbp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sbp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sbp/vocab.txt b/models/sbp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d4f46419ec25925467826d18f6988590c12c5698 --- /dev/null +++ b/models/sbp/vocab.txt @@ -0,0 +1,27 @@ +_ +l +y +v + +j +' +g +h +ʼ +n +s +d +p +f +b +k +e +- +t +á +i +u +o +m +a +w diff --git a/models/sch/G_100000.pth b/models/sch/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d227353b523ecc1e6762144b270579bd734288c --- /dev/null +++ b/models/sch/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4ed897d177368fc94ac64aabecf9fe9a5a12568d26bcdbc8268102e347b652 +size 145486167 diff --git a/models/sch/config.json b/models/sch/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sch/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sch/vocab.txt b/models/sch/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bf8c1ddef7c6843be713b778e7fac4c3dec8fcec --- /dev/null +++ b/models/sch/vocab.txt @@ -0,0 +1,40 @@ +f +x +3 +g +y +j +û +s +e +4 +â +d + +a +ô +- +l +c +ê +z +p +_ +u +h +w +k +n +' +q +b +1 +v +0 +î +— +m +r +i +t +o diff --git a/models/sck/G_100000.pth b/models/sck/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9ce9a337785b1afb57ace66bb911ac539e464010 --- /dev/null +++ b/models/sck/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17b1251271b1950f549f7826927bda7f05011118692bbeb9bc6212460beacf86 +size 145501415 diff --git a/models/sck/config.json b/models/sck/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sck/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sck/vocab.txt b/models/sck/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..06f7e56af5da57f6b31cb7e5fd834d41f650b7af --- /dev/null +++ b/models/sck/vocab.txt @@ -0,0 +1,60 @@ +ढ +ः +ऊ +च +त +ी +ु +भ +ं +प +द +आ +ृ +ड +न +य +श +ट +घ +उ +_ + +म +े +इ +फ +ै +थ +स +व +ठ +अ +् +ल +ण +ऐ +ह +ख +ज +छ +‍ +ा +ग +- +ष +ध +ि +ौ +़ +' +ब +ू +क +झ +ो +र +ओ +ञ +ई +ए diff --git a/models/sda/G_100000.pth b/models/sda/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6e56bb94f6ab3dd845b956f7d5e33c73e30c04b7 --- /dev/null +++ b/models/sda/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59256eb6e52cd890b79a3d35ac46eafa5164c99993663da3c1d988a56b845f7 +size 145477637 diff --git a/models/sda/config.json b/models/sda/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sda/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sda/vocab.txt b/models/sda/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2c66ef13db33310968919fa37e16785e7615f1c4 --- /dev/null +++ b/models/sda/vocab.txt @@ -0,0 +1,29 @@ +i +u +' +k +t +p +d +– +z +w +l + +r +a +g +h +- +f +m +ë +y +c +e +s +o +b +n +j +_ diff --git a/models/sea/G_100000.pth b/models/sea/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7bb87fed2e660183eba1dfb8f5028ad863b104b0 --- /dev/null +++ b/models/sea/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887edbb038537f15fa1f659088248da130db24f4559746c7d137e7949f3c3365 +size 145485297 diff --git a/models/sea/config.json b/models/sea/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sea/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sea/vocab.txt b/models/sea/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..abc684f04258c77e7ed4336f17ffe27e97d3d169 --- /dev/null +++ b/models/sea/vocab.txt @@ -0,0 +1,39 @@ +r +y + +g +2 +m +z +1 +4 +s +q +c +5 +- +h +n +k +' +7 +w +u +o +b +j +i +6 +ñ +d +p +ò +0 +è +f +é +_ +l +e +t +a diff --git a/models/seh/G_100000.pth b/models/seh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3a5158a2ffce16c67a6b9c86ef867b34e18e3c7e --- /dev/null +++ b/models/seh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8124409f6e6def7a167fa9e6753cb85c2de3742e6a3b73e65e14c92b7bf77649 +size 145477607 diff --git a/models/seh/config.json b/models/seh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/seh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/seh/vocab.txt b/models/seh/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..09dff124944c79c60be8a2c2b7d5ff750b7ebbdb --- /dev/null +++ b/models/seh/vocab.txt @@ -0,0 +1,29 @@ +a +| +n +i +u +e +k +m +o +w +t +p +d +b +h +y +l +z +s +g +r +c +f +' +j +v +x +- + diff --git a/models/ses/G_100000.pth b/models/ses/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e9fc72f6462cc7987c55eb64a6d97f5d0083e6f --- /dev/null +++ b/models/ses/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d02e0dd7d4a9c5fbb11fd85943c273e9d3c4c41c81c1da7316b95846f9ce1e6 +size 145482977 diff --git a/models/ses/config.json b/models/ses/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ses/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ses/vocab.txt b/models/ses/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d4e72fb8b5b86ce6b25213b64caf3a3e857223ec --- /dev/null +++ b/models/ses/vocab.txt @@ -0,0 +1,36 @@ +e +a +c +t +ɲ +g +ž +r +y +u +k +i +ã +š +_ +õ +ũ +d +n +s +p +ẽ +z +f +o +h +ŋ +ʼ +j +- + +w +l +m +b +– diff --git a/models/sey/G_100000.pth b/models/sey/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a1edd497bd1b5e536d52b8cea9b2d958009fb28 --- /dev/null +++ b/models/sey/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a05240cac80636449956d10f1b8e9d9b7c3ed48c50667eaea40dd9f0189ac6 +size 145483860 diff --git a/models/sey/config.json b/models/sey/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/sey/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sey/vocab.txt b/models/sey/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1c7e687f9ed9d49249eb30380dbcfb47d5d988a1 --- /dev/null +++ b/models/sey/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +e +' +ë +u +o +j +p +c +̱ +s +y +r +n +h +t +m +q +ñ +d +l +b +ú +í +é +g +f +z +ó +á +v +̲ +x +k + diff --git a/models/sgb/G_100000.pth b/models/sgb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..67faff436478ecc3943edbce0853f95d59791536 --- /dev/null +++ b/models/sgb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28255568cb9c4b4a683008daf7cd2e032ca1f34a1caee60ae02607068932bef1 +size 145487613 diff --git a/models/sgb/config.json b/models/sgb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sgb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sgb/vocab.txt b/models/sgb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5dd6287e188f3d118e0a66d429b188376daf5229 --- /dev/null +++ b/models/sgb/vocab.txt @@ -0,0 +1,42 @@ +a +| +n +i +y +o +h +m +l +k +t +p +g +ê +b +u +r +s +w +d +e +- +j +c +á +í +f +z +v +q +ú +0 +x +1 +2 +ó +ñ +5 +6 +4 +3 + diff --git a/models/sgj/G_100000.pth b/models/sgj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..08751bda93c5c3998be4b8c0f6fdd0ff9ea5ad4c --- /dev/null +++ b/models/sgj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4954bcd2fd7be42d8709c23ee6b438bf7e0c9c6cfcdfc76f3a4a5f9c661003ba +size 145493744 diff --git a/models/sgj/config.json b/models/sgj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sgj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sgj/vocab.txt b/models/sgj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9020d7291ad47252eb20fccd6fd67662ec778aa6 --- /dev/null +++ b/models/sgj/vocab.txt @@ -0,0 +1,50 @@ +े +् +' +ट +त +ओ +उ +ो + +ि +ं +ख +थ +ए +ल +ढ +च +र +ब +म +ध +ज +न +ई +ी +ग +़ +ऊ +भ +_ +द +ू +ु +प +- +ड +स +घ +व +य +आ +ह +इ +अ +ा +क +झ +ठ +छ +फ diff --git a/models/sgw/G_100000.pth b/models/sgw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e175b7792a1b72ca8826cc4bbd8345bba328cc6 --- /dev/null +++ b/models/sgw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:954f2301c6eabaf92f6f8c74be7e2f7873468ba2894aec662abbaf4ebb51fa70 +size 145477619 diff --git a/models/sgw/config.json b/models/sgw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..993d1dedb1d0c8e820b98f9e2f019ff166327038 --- /dev/null +++ b/models/sgw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.uroman", + "validation_files": "dev.uroman", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sgw/vocab.txt b/models/sgw/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf59b28ae2f095ea42d626e977a7e198fb23c678 --- /dev/null +++ b/models/sgw/vocab.txt @@ -0,0 +1,29 @@ + +' +2 +` +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +w +x +y +z diff --git a/models/shi/G_100000.pth b/models/shi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a524d95881cd1833ff0b417abb1de5184982b2c2 --- /dev/null +++ b/models/shi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:341a8cec434bebb628194b18ae98c7dda90b6b935dd7d6d2de446bb130596647 +size 145482994 diff --git a/models/shi/config.json b/models/shi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/shi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/shi/vocab.txt b/models/shi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0520e2419a135c2318d4aedcd21842fc4a0e9d14 --- /dev/null +++ b/models/shi/vocab.txt @@ -0,0 +1,36 @@ +h +j +b +ε +ṣ +ṭ +u +a +š +‐ +ḥ +r +t +f +_ +d +z +q +ḫ +â +s +i +l +k +g +ġ +m +1 +ÿ +n +ḍ +ẓ +w +e + +y diff --git a/models/shk/G_100000.pth b/models/shk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c34c7e2ed23b68a61768b6d07e8fe8be3f847dab --- /dev/null +++ b/models/shk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77af126a8cae7b81039cb91339dbad7c380aab4a3b0e2ccb815df476b3f26075 +size 145491441 diff --git a/models/shk/config.json b/models/shk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/shk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/shk/vocab.txt b/models/shk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..23d7295ed695ab47070f205f933d6ca6236082cb --- /dev/null +++ b/models/shk/vocab.txt @@ -0,0 +1,47 @@ +| +ɑ +n +i +g +k +e +y +ø +w +d +m +ë +b +l +h +j +r +ï +u +̈ +ö +t +- +o +p +́ +c +ì +ó +s +í +̀ +a +á +è +ù +' +– +— +é +ú +ä +8 +2 +1 + diff --git a/models/shn/G_100000.pth b/models/shn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e60c72ee3cde3c1689a16f8ebd60745ef7b0a7c2 --- /dev/null +++ b/models/shn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066125060826e67951156c46afac19f16a435d2b7884a88fda1356bec9fe9679 +size 145489117 diff --git a/models/shn/config.json b/models/shn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/shn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/shn/vocab.txt b/models/shn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cecbbec780d8af982e10ff7ab75affa3cfd10f32 --- /dev/null +++ b/models/shn/vocab.txt @@ -0,0 +1,44 @@ +| +် +ၼ +ႈ +း +ဝ +ႃ +ၵ +ႇ +တ +မ +င +ူ +ဢ +ႉ +ိ +လ +ၢ +ၸ +ေ +ပ +သ +ု +ႁ +ႆ +ၶ +ႂ +ွ +ဵ +ယ +ႄ +ီ +ၽ +ႅ +- +ြ +ထ +ၺ +ျ +ရ +' +ႀ +ၿ + diff --git a/models/sho/G_100000.pth b/models/sho/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..371dc780f765286588b247aab0f01fb1186cdc5e --- /dev/null +++ b/models/sho/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fc7ea65881a8117ab75f2195c42ee91688e896d387e404a70ea269224a32de2 +size 145492181 diff --git a/models/sho/config.json b/models/sho/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sho/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sho/vocab.txt b/models/sho/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..db31fdb83df746103514089e97496a15ebe66a4b --- /dev/null +++ b/models/sho/vocab.txt @@ -0,0 +1,48 @@ +| +a +i +ɛ +k +b +n +e +o +d +t +m +g +à +ã +u +l +h +ɔ +ì +z +s +w +̃ +y +̀ +j +á +' +ũ +p +ĩ +c +ò +è +́ +ù +ǹ +f +r +é +ń +ó +ḿ +v +í +ú + diff --git a/models/shp/G_100000.pth b/models/shp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6b1c4d194a82d05fc5f5657cb648c84494ebeadc --- /dev/null +++ b/models/shp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38cd22bbbbc18517667bcb52464167bee17b48c9671da18e3363963ae9555af8 +size 145483779 diff --git a/models/shp/config.json b/models/shp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/shp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/shp/vocab.txt b/models/shp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c3a01ca3c049e25405cdf73f0b39122f1bf6ba70 --- /dev/null +++ b/models/shp/vocab.txt @@ -0,0 +1,37 @@ +a +| +i +n +o +k +e +j +t +b +s +r +m +x +h +y +w +á +p +c +í +d +é +u +l +— +ó +g +ú +f +z +' +v +q +ñ +- + diff --git a/models/sid/G_100000.pth b/models/sid/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6596738772b69c56a4ab3038e71d77baefbf67e2 --- /dev/null +++ b/models/sid/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86ccb78c177693faa3f2c831b32befe16011d2ac94bed7cee536302fd3d4d148 +size 145484525 diff --git a/models/sid/config.json b/models/sid/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sid/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sid/vocab.txt b/models/sid/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..de9c57a99755e845e7ddcf7b4079b699e91d1bc9 --- /dev/null +++ b/models/sid/vocab.txt @@ -0,0 +1,38 @@ +a +| +n +i +o +e +s +h +u +t +r +m +k +l +d +y +g +b +' +c +w +q +f +x +j +p +z +0 +1 +2 +6 +4 +5 +- +3 +7 +8 + diff --git a/models/sig/G_100000.pth b/models/sig/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..efde79f3972d2f26f8417d6aa296edd1587f95bc --- /dev/null +++ b/models/sig/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f07fe243f00fce411dcea97f2ece0e86bf9bad298ada6b9479ea0eaa58e261d +size 145480807 diff --git a/models/sig/config.json b/models/sig/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sig/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sig/vocab.txt b/models/sig/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ae1aa88aea860ff435cdafee58a60c54655b3a93 --- /dev/null +++ b/models/sig/vocab.txt @@ -0,0 +1,33 @@ +| +a +ɩ +y +ŋ +b +ʋ +l +ɛ +e +k +n +s +i +w +m +t +o +d +g +r +h +u +ɔ +p +f +á +v +- +' +z +̃ + diff --git a/models/sil/G_100000.pth b/models/sil/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..99b0c837383eb7ad7df46248e898d81c8911ebd2 --- /dev/null +++ b/models/sil/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd27d05a1680bfdca5c1a2133a6af8dec0ccaae5b2afabae1bdf0453915a494 +size 145483089 diff --git a/models/sil/config.json b/models/sil/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sil/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sil/vocab.txt b/models/sil/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..99c1e2cb74c907cccc2a6c6be10cc6e092933fd2 --- /dev/null +++ b/models/sil/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +ŋ +u +l +ɛ +n +b +s +m +d +t +e +k +w +r +o +- +ↄ +h +p +y +j +g +c +f +í +z +á +v +' +́ +ú +ɔ + diff --git a/models/sja/G_100000.pth b/models/sja/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7a2e05b60e692c8d4fd205f3dd56ca80a5053a7a --- /dev/null +++ b/models/sja/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3293341ff5d47681a9f87eecd38685948dd9c6e0994730f7d37ba79ae93d879e +size 145490673 diff --git a/models/sja/config.json b/models/sja/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sja/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sja/vocab.txt b/models/sja/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..edbcef7b89d4fe68e4d0166e314cc75d9ac74b35 --- /dev/null +++ b/models/sja/vocab.txt @@ -0,0 +1,46 @@ +a +| +i +e +p +r +' +ã +m +d +t +k +ɨ +o +j +c +u +h +n +b +w +s +õ +– +g +ĩ +l +á +̃ +y +ũ +ẽ +ë +́ +— +ú +é +v +í +f +ó +z +q +ñ +x + diff --git a/models/sjm/G_100000.pth b/models/sjm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4da1346afe9bd3fddf22242c2e13c1208c4e5f39 --- /dev/null +++ b/models/sjm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a437441d274af5052ab9ea04af85eda4ecf08235b3455eec15a1c2e99b71d5 +size 145479135 diff --git a/models/sjm/config.json b/models/sjm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sjm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sjm/vocab.txt b/models/sjm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..884e4055d73e9af98d5f9b75f6a85ec427042c13 --- /dev/null +++ b/models/sjm/vocab.txt @@ -0,0 +1,31 @@ +a +| +n +i +u +m +s +k +t +l +b +y +g +o +ꞌ +p +d +h +e +w +j +r +- +ā +ō +ū +ī +ē +z +c + diff --git a/models/sld/G_100000.pth b/models/sld/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..91fd1e95502f62d29c967a58ed96542d28006322 --- /dev/null +++ b/models/sld/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008fc7b1c31a7f3c2b95db524148bf516564afcaef92321d0386ace76a155244 +size 145490541 diff --git a/models/sld/config.json b/models/sld/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sld/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sld/vocab.txt b/models/sld/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..71135bce0e266755c1ec4972916fba5a19e57500 --- /dev/null +++ b/models/sld/vocab.txt @@ -0,0 +1,46 @@ +| +́ +ɩ +ɛ +a +á +n +ʋ +r +w +s +l +ŋ +m +ɔ +k +b +i +t +í +u +h +e +é +z +o +p +d +y +ó +ú +ɓ +c +f +j +̃ +g +v +' +ẽ +ĩ +ã +õ +q +ṹ + diff --git a/models/slu/G_100000.pth b/models/slu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe3cbefa66030e94dad691038a540ae71ed4c889 --- /dev/null +++ b/models/slu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597194710e23081c97062f5a56ebc1771dc8fead33ae1418d402eb578787904f +size 145476086 diff --git a/models/slu/config.json b/models/slu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/slu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/slu/vocab.txt b/models/slu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9b427ba00da97b288db7bcbdc0a2f38aa6596bfa --- /dev/null +++ b/models/slu/vocab.txt @@ -0,0 +1,27 @@ +| +a +e +k +i +y +s +m +r +n +t +o +u +l +d +h +w +b +g +f +j +- +p +z +c +' + diff --git a/models/sml/G_100000.pth b/models/sml/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f687aea7e286460572ab20a912fe0352ba4e4679 --- /dev/null +++ b/models/sml/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a72b1977f1391e2009e319496660e0f3dc375d107b48ae3f5f61d188bcecd33 +size 145478355 diff --git a/models/sml/config.json b/models/sml/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sml/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sml/vocab.txt b/models/sml/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3457253de9fffab5577e9e3444212767a84ea350 --- /dev/null +++ b/models/sml/vocab.txt @@ -0,0 +1,30 @@ +a +| +n +i +m +u +' +s +k +g +t +l +b +y +p +h +d +o +e +w +ā +r +- +j +ō +ū +ē +ī +­ + diff --git a/models/smo/G_100000.pth b/models/smo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1003731f097b68476d6a443ee0e4617358553fe4 --- /dev/null +++ b/models/smo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a859aa0410c19b8c50b8e155fe5f177f1ca3c759d7860a677992bbde02b946 +size 145475327 diff --git a/models/smo/config.json b/models/smo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/smo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/smo/vocab.txt b/models/smo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9378ffcbcbfa6bfb21274020afd9240368dabf79 --- /dev/null +++ b/models/smo/vocab.txt @@ -0,0 +1,26 @@ +o +g +ē +ō +v +u +m +- +p +r +i +f +l +e +t +ā +h +a +_ +n +k +ī +ū + +' +s diff --git a/models/sna/G_100000.pth b/models/sna/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b914a29c29908bb7038859e224aa967eb3415f84 --- /dev/null +++ b/models/sna/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c6f9d9467006a664551e55787e752ffe49a0ab26f4070598469a750238dba68 +size 145480677 diff --git a/models/sna/config.json b/models/sna/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sna/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sna/vocab.txt b/models/sna/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cf28408c83a2e187cab8297fe1d1f04224e004b0 --- /dev/null +++ b/models/sna/vocab.txt @@ -0,0 +1,33 @@ +w +a +m +t +b + +v +y +c +n +z +g +1 +o +r +_ +q +' +j +ʼ +l +i +4 +k +e +d +u +- +p +s +f +h +2 diff --git a/models/sne/G_100000.pth b/models/sne/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c628730a23ae7d50e30a7d90d72992c1df647a48 --- /dev/null +++ b/models/sne/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40bd11dd5d3a91b1849e9ff96ebe1e0a4197395fe15ebb52ab66cc86f07c7a8 +size 145486031 diff --git a/models/sne/config.json b/models/sne/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sne/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sne/vocab.txt b/models/sne/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c1a059b296ae429beda2c72b0e18cf5122c76420 --- /dev/null +++ b/models/sne/vocab.txt @@ -0,0 +1,40 @@ +| +a +n +o +i +u +d +ꞌ +e +h +g +s +t +k +r +m +p +b +y +j +- +l +w +f +— +z +v +c +0 +x +2 +1 +' +6 +4 +3 +7 +q +5 + diff --git a/models/snn/G_100000.pth b/models/snn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c13de348547d1412d381923e68204ae55ddac288 --- /dev/null +++ b/models/snn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e09b43a169441af9b5663cbafbadae3c446705db49c7314a1863b58cff2cffe +size 145486825 diff --git a/models/snn/config.json b/models/snn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/snn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/snn/vocab.txt b/models/snn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c6bec43422056435cca48be1ab3417bbf022f0c6 --- /dev/null +++ b/models/snn/vocab.txt @@ -0,0 +1,41 @@ +2 +0 +y +q +b +4 +ü + +k +p +o +v +l +é +c +m +á +d +i +f +a +ú +1 +z +j +— +s +n +u +g +í +ë +ñ +t +h +e +x +_ +ó +r +' diff --git a/models/snp/G_100000.pth b/models/snp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f54adde71a49471a49e1c3dd1c5e016f8d94b61 --- /dev/null +++ b/models/snp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0788cbc70971f461a5e28f1dbbe51450676188729bc95e13f30000c48e1d6ad3 +size 145473649 diff --git a/models/snp/config.json b/models/snp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/snp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/snp/vocab.txt b/models/snp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9543e4f996ef2b919213bb133c034b1ce0669ae7 --- /dev/null +++ b/models/snp/vocab.txt @@ -0,0 +1,24 @@ +b +a +y +i +l +p +á +n +' +t +w +- +_ +k +f +u +o +m + +s +h +e +g +d diff --git a/models/snw/G_100000.pth b/models/snw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..69d69a5f3731d563b401cbcd367e8e2b06f66229 --- /dev/null +++ b/models/snw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a7130afffe2aa693214a0bd4b337d46c73f05b2d9baf5647f8029a2cff9c45 +size 145483779 diff --git a/models/snw/config.json b/models/snw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/snw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/snw/vocab.txt b/models/snw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1a08097489ca3a6cd405862e6978fd9a8d03d718 --- /dev/null +++ b/models/snw/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +n +k +o +e +l +ɛ +ɔ +b +u +t +s +y +m +w +f +d +p +ũ +r +ĩ +- +h +g +á +í +ã +v +é +̃ +' +à +ú +́ + diff --git a/models/som/G_100000.pth b/models/som/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d125e3a33fb2a9819352bde0f6679c43a198687 --- /dev/null +++ b/models/som/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c8fad5fecebdbe11bbe487ab8d309958183d9812704ece83a92a16e04f1224b +size 145476087 diff --git a/models/som/config.json b/models/som/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/som/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/som/vocab.txt b/models/som/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1cf083f978b053b568c8a6ff84c42de4da7b1d6d --- /dev/null +++ b/models/som/vocab.txt @@ -0,0 +1,27 @@ +u +b + +e +c +' +f +w +l +j +d +n +r +i +g +q +m +t +_ +k +y +o +x +s +- +h +a diff --git a/models/soy/G_100000.pth b/models/soy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..68edd038825d01d4f0e40bd538a7a58eaaba4ba3 --- /dev/null +++ b/models/soy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d60c92dad35fc22e36befb88688decbbc205da29419f4d85304dea95cc73c336 +size 145490675 diff --git a/models/soy/config.json b/models/soy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/soy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/soy/vocab.txt b/models/soy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4328dd448566841ce34fd3e6c3963e5e9d026630 --- /dev/null +++ b/models/soy/vocab.txt @@ -0,0 +1,46 @@ +ɔ +ú +ũ +y +– +t +ɛ +r +ó +é +_ + +ë +w +l +́ +n +o +õ +ɑ +k +f +ṍ +ḿ +ø +i +á +- +c +ṹ +ń +ĩ +ŋ +í +̃ +ẽ +ñ +h +û +e +u +m +a +s +̀ +p diff --git a/models/spa/G_100000.pth b/models/spa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..32705731825470d2cc347c71bb071172f153e32a --- /dev/null +++ b/models/spa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e0fc080bd1de68c2d74678c33c68307e156b9ad2cc039c1c6f41b46b58c833 +size 145490007 diff --git a/models/spa/config.json b/models/spa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/spa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/spa/vocab.txt b/models/spa/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..759ed624158f34d7fc0054ac6ceaf1cd492c43d9 --- /dev/null +++ b/models/spa/vocab.txt @@ -0,0 +1,45 @@ +7 +a +v +c +— +0 +5 +ó +8 +p +y +z +4 +m +ü +k +s +á +q +h +n +é +_ +9 +1 +f +t + +x +d +í +b +3 +j +g +l +2 +i +u +e +ú +o +ñ +r +6 diff --git a/models/spp/G_100000.pth b/models/spp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2fd080042c7c36650ca89e28b98d1c8a1a0a08cb --- /dev/null +++ b/models/spp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097de6f1b114b9ed26b684b81910cf06cb556e3ece9d818230a782be3ed30e22 +size 145494485 diff --git a/models/spp/config.json b/models/spp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/spp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/spp/vocab.txt b/models/spp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cee84f3a310dc530b764c6c7fa6205b1ea532013 --- /dev/null +++ b/models/spp/vocab.txt @@ -0,0 +1,51 @@ +| +i +a +n +y +e +u +k +m +p +r +ɛ +s +w +l +à +ŋ +ɲ +h +o +t +g +á +ɔ +b +c +f +' +j +ù +í +ì +è +̀ +d +ú +‐ +z +é +ò +î +́ +û +ê +â +̂ +v +ó +ô +- + diff --git a/models/spy/G_100000.pth b/models/spy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8fd56e749c472ed05a1b784d6f7d50d451b48797 --- /dev/null +++ b/models/spy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e024cf1b44bdbc565fd0c10c2a1c51d5cd070be9b7eae0b3b73832dc259bda2 +size 145478389 diff --git a/models/spy/config.json b/models/spy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/spy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/spy/vocab.txt b/models/spy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..26531bed0685e6a1cdb6cd35c8c15335a81434c9 --- /dev/null +++ b/models/spy/vocab.txt @@ -0,0 +1,30 @@ +| +k +o +ē +i +y +u +a +n +t +e +m +ā +ō +b +c +h +r +s +l +w +' +g +- +f +1 +2 +6 +0 + diff --git a/models/sqi/G_100000.pth b/models/sqi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f0ff10f65ca0257ea4d061e56a0f0451d7212de1 --- /dev/null +++ b/models/sqi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa694eab0c15f4535b90848d67f4400064e2d8499f31215eec40466c62200499 +size 145481476 diff --git a/models/sqi/config.json b/models/sqi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sqi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sqi/vocab.txt b/models/sqi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b89d09bed6069944c71db2a4d475bc95000e7b2f --- /dev/null +++ b/models/sqi/vocab.txt @@ -0,0 +1,34 @@ +| +ë +e +t +i +r +a +n +s +h +j +u +d +o +m +p +k +l +g +b +q +v +z +f +y +ç +' +c +x +- +è +– +à + diff --git a/models/sri/G_100000.pth b/models/sri/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ae376cffbe56b03e76c6dea10a5ea1d2893ae95 --- /dev/null +++ b/models/sri/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d657768a38b670df1ad9031e76299261c04ce411f81f4b8c8381bf7a438cc21 +size 145491443 diff --git a/models/sri/config.json b/models/sri/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sri/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sri/vocab.txt b/models/sri/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dede8983d4aa7cf3e8f2019c71bb29f557d80e1d --- /dev/null +++ b/models/sri/vocab.txt @@ -0,0 +1,47 @@ +| +r +a +ã +e +i +ʉ +m +ĩ +g +s +o +u +p +k +̃ +b +d +y +ẽ +j +õ +t +w +n +ũ +ñ +í +́ +á +é +ú +c +l +— +ó +h +f +v +ü +' +ṍ +̶ +z +q +x + diff --git a/models/srm/G_100000.pth b/models/srm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2808530c2995f1ccc6be2400e8791b63783a778 --- /dev/null +++ b/models/srm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b07c989be573e59a0ba6abf26854bcc8c0390b1016f7e792d253116f449aac +size 145483759 diff --git a/models/srm/config.json b/models/srm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/srm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/srm/vocab.txt b/models/srm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..38451ab62f4c14e3b24cfe35f896887b50cf3010 --- /dev/null +++ b/models/srm/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +n +u +d +e +ë +s +ö +t +k +o +m +b +f +g +w +h +l +j +p +˻ +˼ +z +' +- +v +0 +ú +1 +2 +4 +í +6 +r + diff --git a/models/srn/G_100000.pth b/models/srn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..43211d978ab3a0728395d49feda79d2df7cc77e0 --- /dev/null +++ b/models/srn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996fe08cb0801107589a2ba3669cee24afb4a0a460321b5ab35346cd827fe551 +size 145482211 diff --git a/models/srn/config.json b/models/srn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/srn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/srn/vocab.txt b/models/srn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..47c3a0031b3f58a6c1173e1b72f1ff004d1c6f8d --- /dev/null +++ b/models/srn/vocab.txt @@ -0,0 +1,35 @@ +| +a +n +i +e +u +s +d +o +r +k +t +m +f +b +g +w +p +l +y +' +è +h +- +ê +v +ô +0 +j +1 +2 +z +á +c + diff --git a/models/srx/G_100000.pth b/models/srx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9649a07655d553dcf3bd47a6eca5d68a83fe101b --- /dev/null +++ b/models/srx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d48eecad60cf838466185b6ddac765a53638948be76c2ee8fd19bfd48406e42 +size 145504503 diff --git a/models/srx/config.json b/models/srx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/srx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/srx/vocab.txt b/models/srx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..72ca46d79d12d84d58fabb8561e2b698e82b1f23 --- /dev/null +++ b/models/srx/vocab.txt @@ -0,0 +1,64 @@ +| +ै +ा +ौ +र +क +त +ी +स +द +ि +ु +म +ज +ब +ं +ो +श +प +न +व +य +ल +ण +औ +े +ह +् +ए +आ +ख +थ +ू +ग +़ +ऐ +च +ई +ड +ट +भ +इ +ध +छ +अ +फ +ओ +ठ +ऊ +‍ +ढ +ष +घ +झ +ँ +- +उ +ञ +' +ृ +0 +4 +1 + diff --git a/models/stn/G_100000.pth b/models/stn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b24d8afa25453dcd9c841398de0062583e5d7da3 --- /dev/null +++ b/models/stn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd76009750a6d9a08963ac6d8bf903461bef7cb694d9f4e49e61907b4a4dea52 +size 145476840 diff --git a/models/stn/config.json b/models/stn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/stn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/stn/vocab.txt b/models/stn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..040dc683e53e1379ede546841614ded26f360331 --- /dev/null +++ b/models/stn/vocab.txt @@ -0,0 +1,28 @@ +a +| +i +n +r +o +k +g +m +u +e +t +f +w +s +p +q +h +d +j +l +b +v +' +5 +3 +9 + diff --git a/models/stp/G_100000.pth b/models/stp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..78fba2fcf852918aa9a4ca775d35d4877492d853 --- /dev/null +++ b/models/stp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba1eac50f4e120b4eb35712d38df2dbb304757fa4f8056fe87a84f945f918cf +size 145476713 diff --git a/models/stp/config.json b/models/stp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/stp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/stp/vocab.txt b/models/stp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d216ab69f97d6769955719221a1a1d3dbf9ae03a --- /dev/null +++ b/models/stp/vocab.txt @@ -0,0 +1,28 @@ +| +a +' +i +u +m +j +h +n +ɨ +g +k +t +b +d +p +o +x +c +ñ +r +s +l +e +ɇ +y +f + diff --git a/models/suc/G_100000.pth b/models/suc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a796cf5d8640719cc152cb896f5f27e143d962b1 --- /dev/null +++ b/models/suc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a158faea54ad2dad5e488403a5272d3bd957479e9f95c4ad0b49da9e1da680d4 +size 145475294 diff --git a/models/suc/config.json b/models/suc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/suc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/suc/vocab.txt b/models/suc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a0abcae22e3bc3f3a23294b3e7534ada5864316e --- /dev/null +++ b/models/suc/vocab.txt @@ -0,0 +1,26 @@ +| +o +n +a +g +i +u +m +k +t +s +l +d +ꞌ +p +b +y +w +r +e +h +' +— +0 +5 + diff --git a/models/suk/G_100000.pth b/models/suk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..375596ab9ecafbb21c7dd51e17b0b7b864e90fb3 --- /dev/null +++ b/models/suk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6abe73d99c46628b875498f4dd8e2f67534a4d019cdd6023ec815e4a262e74cd +size 145483754 diff --git a/models/suk/config.json b/models/suk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/suk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/suk/vocab.txt b/models/suk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..534e248c09a4d609c8740f9015705ad4f9c1ce43 --- /dev/null +++ b/models/suk/vocab.txt @@ -0,0 +1,37 @@ +| +a +n +ū +l +ī +g +e +i +o +b +k +w +h +m +y +u +s +t +j +' +d +z +p +f +c +ò +- +v +ó +à +è +0 +1 +2 +ì + diff --git a/models/sun/G_100000.pth b/models/sun/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0f79f47390ac477660cdb68993a08281fca3c579 --- /dev/null +++ b/models/sun/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10bdde1d04b5d554d15cf46ebe6fdf8408ab9187e4af1d7a69206488f251cdc +size 145483759 diff --git a/models/sun/config.json b/models/sun/config.json new file mode 100644 index 0000000000000000000000000000000000000000..1f20c1e349fa34cb5c4ec81962ddafa6026954e0 --- /dev/null +++ b/models/sun/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 48, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sun/vocab.txt b/models/sun/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..762cb1cef0b1f081ffb13b441dcfcc9544fddf84 --- /dev/null +++ b/models/sun/vocab.txt @@ -0,0 +1,37 @@ +a +| +n +u +e +i +k +g +r +t +s +l +h +m +d +p +b +j +y +o +w +- +c +' +f +0 +2 +6 +1 +5 +4 +7 +3 +8 +z +9 + diff --git a/models/sur/G_100000.pth b/models/sur/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..051687dae56e0d9edbdbf2235a548a37bd8e9e06 --- /dev/null +++ b/models/sur/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06d1e4f6fd1fd6f3385f985b8f841637af0a43a3329bae99cb1895ca7a89ebbd +size 145485305 diff --git a/models/sur/config.json b/models/sur/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sur/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sur/vocab.txt b/models/sur/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..df2141a41470afa4443bec1cfa6f20cc6c96c82a --- /dev/null +++ b/models/sur/vocab.txt @@ -0,0 +1,39 @@ +| +i +a +n +e +m +u +k +o +s +r +ɗ +w +g +̶ +t +p +h +l +y +ɓ +b +j +d +f +̵ +ì +c +z +` +í +á +- +à +v +' +ú +é + diff --git a/models/sus/G_100000.pth b/models/sus/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a4f2fbfe364398c36e5214bf6053f336e463a85c --- /dev/null +++ b/models/sus/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b463355200642a19a36692af301a6b74482fcf47d118ba5633dee3eb225c9a5 +size 145476083 diff --git a/models/sus/config.json b/models/sus/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sus/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sus/vocab.txt b/models/sus/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e3919428715857ad325cdd88e787ebb940e066b5 --- /dev/null +++ b/models/sus/vocab.txt @@ -0,0 +1,27 @@ +| +a +n +i +x +e +m +ɛ +r +u +b +o +l +ɔ +s +y +t +f +k +d +w +g +ɲ +h +p +- + diff --git a/models/suv/G_100000.pth b/models/suv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d381e071f233f1b6d57083404080f0385c6d188a --- /dev/null +++ b/models/suv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1a7f7d7c40a41368ad64c1c46e7c395e4c13fd3fe12b7905e41574ec4976ed8 +size 145480690 diff --git a/models/suv/config.json b/models/suv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/suv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/suv/vocab.txt b/models/suv/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..4a08ce0e4e021f97d32c52be753dbc089fdda9df --- /dev/null +++ b/models/suv/vocab.txt @@ -0,0 +1,33 @@ +a +| +i +n +e +o +u +r +t +k +g +b +l +j +ë +s +h +d +p +m +c +y +w +ü +f +' +v +z +ǃ +ː +̈ +- + diff --git a/models/suz/G_100000.pth b/models/suz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..352f730364b0fe73fbea1eae7c5b173a74763705 --- /dev/null +++ b/models/suz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:549e3465832d88d53099754bae6e8dfd7308d1b2cd8b5123fb9b7386afb2b00f +size 145506775 diff --git a/models/suz/config.json b/models/suz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/suz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/suz/vocab.txt b/models/suz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..36b9dc367e78136cfe7ac63d9bfbde44b1e290a8 --- /dev/null +++ b/models/suz/vocab.txt @@ -0,0 +1,67 @@ +आ +‍ +ग +ठ +थ +़ +ै +१ +० +म +ा +य +क +त +श +ब +च +ढ +७ +घ +ह +ओ +ल +र +छ +द +_ + +ो +४ +इ +ौ +औ +ं +् +२ +८ +' +ि +३ +ज +प +ख +ध +ऊ +ऐ +झ +५ +भ +ु +ए +६ +न +अ +े +९ +व +उ +ट +ई +ड +- +ी +ू +फ +स +ङ diff --git a/models/swe/G_100000.pth b/models/swe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c34bb457f5a50e8a98d48f378212b639e4a1152c --- /dev/null +++ b/models/swe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e088ba36cb8bdc1e97198477f11cef8a2e0c14dba99047237bc301b46739aff +size 145483657 diff --git a/models/swe/config.json b/models/swe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/swe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/swe/vocab.txt b/models/swe/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6ebc23970f4e0a6f1366c2b428e68f7b34bc5c4 --- /dev/null +++ b/models/swe/vocab.txt @@ -0,0 +1,37 @@ +x +e +b +o +l +r +– +j +a +7 +n +m +2 +y + +d +é +s +h +ö +_ +g +' +c +u +- +z +q +f +p +i +6 +å +t +ä +k +v diff --git a/models/swh/G_100000.pth b/models/swh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8565cf9c4b241eb024e504b65e610e07cdf9c6c5 --- /dev/null +++ b/models/swh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a29f04df5a4355cde4cf1ade9d14c1270432cdad731e00c5a8fb10a35db86235 +size 145485169 diff --git a/models/swh/config.json b/models/swh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/swh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/swh/vocab.txt b/models/swh/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..cf11713b3ace09a6de26f2dc165ebccbf5a0b46f --- /dev/null +++ b/models/swh/vocab.txt @@ -0,0 +1,39 @@ +2 +m +d +r +j +l +s +8 +c +- +k +e +p +0 +g +u +w +7 +4 +b +f +v +i +1 +3 +o +t +6 +z +a +y +5 +n +q + +ʼ +' +h +_ diff --git a/models/sxb/G_100000.pth b/models/sxb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dedc529dc4795cca764c0799f2137d92a8cf7d32 --- /dev/null +++ b/models/sxb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2dff9b2241a79e284d8b948c7306f7e7efa3f8da9b2bff65a5b5e6f9f01754 +size 145479902 diff --git a/models/sxb/config.json b/models/sxb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sxb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sxb/vocab.txt b/models/sxb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8c94fe22ef7839bf0407e57475e9240cdad6b95a --- /dev/null +++ b/models/sxb/vocab.txt @@ -0,0 +1,32 @@ +r +2 +0 +p +y +t +k +b +m +c +v +h +o +i +- +e + +j +4 +d +z +g +' +n +f +s +u +a +l +1 +w +_ diff --git a/models/sxn/G_100000.pth b/models/sxn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fe8d0b3f7c52fc2aeb5d947278ad4c84a1efa871 --- /dev/null +++ b/models/sxn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d839a40b6af72bd496487dcf3dd5cfb6e54dd1df82a30a9f1be41663a0029d0 +size 145486847 diff --git a/models/sxn/config.json b/models/sxn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sxn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sxn/vocab.txt b/models/sxn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..55ca6581a3a8a3a4df42ed932d69243cd908d283 --- /dev/null +++ b/models/sxn/vocab.txt @@ -0,0 +1,41 @@ +| +a +n +e +u +i +g +k +s +m +t +r +l +ẹ +ě +o +̌ +p +w +h +ạ +b +d +᷊ +ụ +ọ +ị +' +y +- +j +f +z +c +0 +ú +4 +1 +ó +2 + diff --git a/models/sya/G_100000.pth b/models/sya/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2df81e32ebebfc32213c03c7bd6a326b67e17fcb --- /dev/null +++ b/models/sya/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1965a25ffca6b0d76441d0366405ddd39f92c874ddce8ca7e287799afffbc550 +size 145475211 diff --git a/models/sya/config.json b/models/sya/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sya/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sya/vocab.txt b/models/sya/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c369d02e32b42600e0c94729848fc90f354b7f3d --- /dev/null +++ b/models/sya/vocab.txt @@ -0,0 +1,26 @@ +a +j +' +o +c +m +h +b +– +l +u +t +y +e +- +_ +r +w +s +k +i +p + +n +d +g diff --git a/models/syl/G_100000.pth b/models/syl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7401342fae924ab8002063b8a4c3eca7c82c8303 --- /dev/null +++ b/models/syl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b19c9fbd5a97cd0da1d022555bc59d8dc91708f438103e97a8e6c053fde77d5 +size 145479904 diff --git a/models/syl/config.json b/models/syl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/syl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/syl/vocab.txt b/models/syl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ecf62874f836a6526f2310d10f46d9a1879988e2 --- /dev/null +++ b/models/syl/vocab.txt @@ -0,0 +1,32 @@ +| +a +o +i +r +e +n +t +u +l +m +s +ḳ +b +h +d +z +k +g +f +ṭ +- +ṛ +c +ḍ +j +p +y +ʼ +' +— + diff --git a/models/sza/G_100000.pth b/models/sza/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d4849c882fb6275b22d90cc82eda03fba08322ff --- /dev/null +++ b/models/sza/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa589455cf81c9d975f1caa096d5b6fb3f75c7bfa1b43dd6745b52e428b1006 +size 145475408 diff --git a/models/sza/config.json b/models/sza/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/sza/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/sza/vocab.txt b/models/sza/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..60f352f905c660d294f341851ef8b794fd5357da --- /dev/null +++ b/models/sza/vocab.txt @@ -0,0 +1,26 @@ +w +r +j +c +_ +h +n +– +u +o +t +' +d +p +b +k +g +a + +m +y +- +i +l +e +s diff --git a/models/tac/G_100000.pth b/models/tac/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb0f965fb57da334c3c08735e554acdd77eaa101 --- /dev/null +++ b/models/tac/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0eb06ee47d3af0a2933322be0f048e290ca5756a59bb865ebb8f9608956ce3 +size 145481435 diff --git a/models/tac/config.json b/models/tac/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tac/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tac/vocab.txt b/models/tac/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..709b56ff9b9aba6223d3b9c6f7a6789a4c2c7b36 --- /dev/null +++ b/models/tac/vocab.txt @@ -0,0 +1,34 @@ +n +e +ó +i +d +b +v +s +á +r + +p +h +a +í +é +l +f +ñ +_ +y +t +k +— +c +u +w +' +o +g +j +ú +- +m diff --git a/models/taj/G_100000.pth b/models/taj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a267fd5a88fcf420a1fd3597876ebe3db6844751 --- /dev/null +++ b/models/taj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa568d9a2e0f44738a1bcd72b0cbc81fac47a3e5fa9618e07956ed6528454781 +size 145503723 diff --git a/models/taj/config.json b/models/taj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/taj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/taj/vocab.txt b/models/taj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8fcf6eee2d513f23de056dc6e3af41d63c670ada --- /dev/null +++ b/models/taj/vocab.txt @@ -0,0 +1,63 @@ +| +ा +् +े +‍ +न +म +र +ल +स +ी +ि +द +ब +ह +ग +ङ +ु +य +ो +त +प +ज +थ +च +भ +श +व +क +ख +छ +इ +आ +ए +ू +ध +ै +ओ +ट +ः +फ +अ +उ +ष +घ +झ +ठ +ड +ढ +ण +ँ +ञ +ऊ +ं +ौ +' +ई +ऐ +६ +- +v +औ + diff --git a/models/tam/G_100000.pth b/models/tam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c834f6d178849640da5239539d67a444587f9418 --- /dev/null +++ b/models/tam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbade8e2c8442db96515d30edadebf532b63eb595e4da5ffb60cc233dd2896e1 +size 145499887 diff --git a/models/tam/config.json b/models/tam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tam/vocab.txt b/models/tam/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..491a1a901fea2a2edefbf621223b529c2264d881 --- /dev/null +++ b/models/tam/vocab.txt @@ -0,0 +1,58 @@ +3 +அ +ஈ +உ +_ +ந +க + +ே +ா +ெ +ஊ +ீ +' +7 +a +ஏ +வ +் +ஹ +ூ +ல +ட +2 +ழ +ர +ம +ஒ +ற +ஞ +ச +9 +ி +ஸ +ை +ன +6 +எ +இ +ய +ௌ +த +ோ +ள +1 +ஆ +ப +0 +ண +ஓ +ங +ு +ஐ +ஜ +4 +ஷ +ொ +5 diff --git a/models/tao/G_100000.pth b/models/tao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf9f0930a149e00e36a4e6ae35ccbc785965681b --- /dev/null +++ b/models/tao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d539fe55039ed6c65774917b17a0dba5d9d0351905acac8f611231fc76278e8 +size 145475301 diff --git a/models/tao/config.json b/models/tao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tao/vocab.txt b/models/tao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ca084997b349d65c3702dda19cda235c25f1ea97 --- /dev/null +++ b/models/tao/vocab.txt @@ -0,0 +1,26 @@ +w +k +n +- +a +t +l +r +_ +p +z +o +y +g +i +' +v +s +h +c +j +m +d +b + +e diff --git a/models/tap/G_100000.pth b/models/tap/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d4f84976804a928bfd1f72a2d0bdb9e6c6ed47d --- /dev/null +++ b/models/tap/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddb1628f4753958df2e6ae08eb54b6ca001ce01cc55b90e1ee400e5d7dd8d157 +size 145482967 diff --git a/models/tap/config.json b/models/tap/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tap/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tap/vocab.txt b/models/tap/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..50bdadd04755245588aa5ce49881aba15b702890 --- /dev/null +++ b/models/tap/vocab.txt @@ -0,0 +1,36 @@ +w +g +a + +4 +z +2 +s +b +l +k +d +m +v +0 +n +7 +6 +_ +o +y +t +c +u +5 +' +h +p +3 +r +1 +e +i +9 +8 +f diff --git a/models/taq/G_100000.pth b/models/taq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ae703a7f8a86d87ece19b288350468f6f94db93 --- /dev/null +++ b/models/taq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:080dac44526643b40a6f0a60c6c079f2cc0c8390defb8a702fd36016d8974bac +size 145484624 diff --git a/models/taq/config.json b/models/taq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/taq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/taq/vocab.txt b/models/taq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ce22c4f87ac1bc76185dd9acb62d57640825a10d --- /dev/null +++ b/models/taq/vocab.txt @@ -0,0 +1,38 @@ +w +t +f +ẓ +ă +q +ž +ṣ +ǝ +ĕ +j +' +k +ṭ +- +š +a +ɣ +h +l +e +c +_ +m +n +b +u +i +r +s +d +x +y +ḍ +z +o +g + diff --git a/models/tat/G_100000.pth b/models/tat/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e73e6674e1a990e2c8262f5f3e6b3670b3d01c0 --- /dev/null +++ b/models/tat/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8ed58fbf11ffc248dacf49cc83597e09fb631c82841771be24488c32076ed9b +size 145488395 diff --git a/models/tat/config.json b/models/tat/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tat/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tat/vocab.txt b/models/tat/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7719017c77b6625da0b6b32c72cff35b038cb88a --- /dev/null +++ b/models/tat/vocab.txt @@ -0,0 +1,43 @@ +| +а +е +ә +н +л +ы +р +к +т +и +г +м +б +д +с +у +з +ш +ч +ү +њ +й +п +ћ +μ +х +я +о +– +ђ +в +ф +э +ь +ъ +ю +- +ѳ +ж +ц +1 + diff --git a/models/tav/G_100000.pth b/models/tav/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9e0322324006b58d397b40466747cdd0d6579774 --- /dev/null +++ b/models/tav/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc94a1eacdc25b1e974f24cf27e7316179c9a759f931ce76b3a38fd0d895853 +size 145488391 diff --git a/models/tav/config.json b/models/tav/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tav/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tav/vocab.txt b/models/tav/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..de7b0348e31e06c20a8849f43c235667b9636580 --- /dev/null +++ b/models/tav/vocab.txt @@ -0,0 +1,43 @@ + +ã +é +f +n +á +— +r +g +p +õ +_ +v +z +y +s +i +a +k +q +ñ +- +ó +í +l +ʉ +h +c +m +ẽ +ĩ +e +o +j +́ +d +b +ũ +w +u +t +̃ +ú diff --git a/models/tbc/G_100000.pth b/models/tbc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9dcd8c9f16cabf7e9255ead8dce580b4837fd023 --- /dev/null +++ b/models/tbc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca13add5de9514211f0618ca926f611b76968de5130394fa961ad5ec28af150 +size 145483773 diff --git a/models/tbc/config.json b/models/tbc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tbc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tbc/vocab.txt b/models/tbc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..adb20975e205ddcbd8bd4e5ec23f0d7c71ede1f9 --- /dev/null +++ b/models/tbc/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +n +u +o +d +e +m +l +t +g +ŋ +s +k +p +r +b +y +w +f +j +h +v +0 +1 +2 +' +4 +7 +5 +3 +6 +9 +- +8 + diff --git a/models/tbg/G_100000.pth b/models/tbg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..21e1d6f7e0f0c868ebabf2f4354a6926fcd28d9b --- /dev/null +++ b/models/tbg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a3e6081415987c4ae79b07d71fa95c4f0c7c05b9ec254a330d04177ab0415c +size 145486163 diff --git a/models/tbg/config.json b/models/tbg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tbg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tbg/vocab.txt b/models/tbg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2c814f1ee3fcb016dccf8b36d7f0b86534ed4b9f --- /dev/null +++ b/models/tbg/vocab.txt @@ -0,0 +1,40 @@ +h +k +u +3 +4 +- +y +á +6 +5 +o +t +q +v +m +˼ +— +i +g +7 +l +­ +b +d +8 +1 +r +2 +n +a +s +9 + +_ +e +f +p +j +0 +˻ diff --git a/models/tbk/G_100000.pth b/models/tbk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..528ca5b68e80189b14a27ebb7539ba550a5a6c9f --- /dev/null +++ b/models/tbk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a63a01341a13f5b50230505ec45c8991b08029167783a6beaec7787cad5ee5d +size 145489235 diff --git a/models/tbk/config.json b/models/tbk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tbk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tbk/vocab.txt b/models/tbk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..41ddf9b7cbd00416e3fb3b1227794db7b1341202 --- /dev/null +++ b/models/tbk/vocab.txt @@ -0,0 +1,44 @@ +a +| +n +g +u +i +t +m +y +p +s +e +k +r +d +l +̱ +b +w +o +- +j +c +h +f +z +v +q +' +x +1 +̠ +̓ +ë +2 +3 +4 +5 +6 +7 +8 +9 +0 + diff --git a/models/tbl/G_100000.pth b/models/tbl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e081afdc7e880812d5fcb13864c7955dc5c5332a --- /dev/null +++ b/models/tbl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e4082a697edcceda3dc194fed69f5fe84318b9993da6d1896f948e1d7824992 +size 145482993 diff --git a/models/tbl/config.json b/models/tbl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tbl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tbl/vocab.txt b/models/tbl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3909cbe5e37e9fc757b76dd1e55166fe22b1305a --- /dev/null +++ b/models/tbl/vocab.txt @@ -0,0 +1,36 @@ +ò +ó +j +- + +t +m +k +f +d +' +r +ù +s +w +g +é +ì +à +o +h +ê +e +p +b +i +y +n +è +l +a +u +6 +_ +c +ô diff --git a/models/tby/G_100000.pth b/models/tby/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8844cde4f0cf9bbb6d215b807d4d22b3ffd2370e --- /dev/null +++ b/models/tby/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af782b8b6b5f73319eeec7d50f567ad3c615c7a2ff56d01eb96f77a507ecdd34 +size 145484523 diff --git a/models/tby/config.json b/models/tby/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tby/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tby/vocab.txt b/models/tby/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8eb587646502fc05489b2f341dd29d77179634d5 --- /dev/null +++ b/models/tby/vocab.txt @@ -0,0 +1,38 @@ +| +a +o +i +n +e +u +' +g +k +m +s +d +t +w +r +y +l +b +- +p +j +h +f +c +0 +2 +8 +z +7 +1 +4 +3 +5 +– +6 +9 + diff --git a/models/tbz/G_100000.pth b/models/tbz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1142fa601e1bf294282bea4570bb36daa40bcacd --- /dev/null +++ b/models/tbz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b19cd493f1b7ae8621c89c6b5156b2692b37d28b971cade6117dd09705a452b4 +size 145489909 diff --git a/models/tbz/config.json b/models/tbz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tbz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tbz/vocab.txt b/models/tbz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c7c33d71a0288ef3c9de84f02dfecc6a228b8033 --- /dev/null +++ b/models/tbz/vocab.txt @@ -0,0 +1,45 @@ +| +ɛ +̀ +k +n +i +ɑ +t +m +́ +d +b +ɔ +u +y +ì +o +̃ +e +í +s +p +ò +ù +r +ó +w +è +ú +c +ĩ +f +h +é +ũ +ǹ +ń +a +ṹ +ḿ +ã +á +- +v + diff --git a/models/tca/G_100000.pth b/models/tca/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..43ea0ef295f2415ccd9d7c8032e39a091e7e6120 --- /dev/null +++ b/models/tca/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665f492292b323f4cc44f127021b40576e64edfb375831174228f580b6a86e28 +size 145503095 diff --git a/models/tca/config.json b/models/tca/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tca/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tca/vocab.txt b/models/tca/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5dcaec022b03e25d9cb86e47d19c4b4402e826b3 --- /dev/null +++ b/models/tca/vocab.txt @@ -0,0 +1,62 @@ +| +a +ü +x +n +g +r +u +̃ +c +e +m +i +t +ẽ +h +y +p +ã +̱ +o +w +ĩ +d +ñ +á +í +— +́ +é +ũ +ú +õ +b +s +ó +ṯ +q +ṉ +0 +ḏ +ṍ +1 +l +7 +ḇ +ǘ +ṹ +2 +z +4 +f +- +5 +' +6 +ẖ +8 +3 +9 +k + diff --git a/models/tcc/G_100000.pth b/models/tcc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..40d94609326ab1a307ae38abb68a0d9616ed4e89 --- /dev/null +++ b/models/tcc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6b05d619c2a6e49a0f4a72895c1d97289861ed108d504ed2c040212202a6f1f +size 145475319 diff --git a/models/tcc/config.json b/models/tcc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tcc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tcc/vocab.txt b/models/tcc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..78f7d26abaec833cdfe242994c94f5a2848329df --- /dev/null +++ b/models/tcc/vocab.txt @@ -0,0 +1,26 @@ +l +- +_ +s +h +j +' +p +k +t +o +i +u +b +n +e +m +w +c +a +g +r +f +d + +y diff --git a/models/tcs/G_100000.pth b/models/tcs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a455f756d12e03d849f7cd93b9c553ebeb960262 --- /dev/null +++ b/models/tcs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76c38db8f6dec304700947adc4dd5ab02ea9a29f683f4f434a283898b361b8e +size 145472231 diff --git a/models/tcs/config.json b/models/tcs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tcs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tcs/vocab.txt b/models/tcs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6cbb056bfa791ede8339eca50fde847a8eda61ff --- /dev/null +++ b/models/tcs/vocab.txt @@ -0,0 +1,22 @@ +| +a +i +e +n +o +m +p +d +l +b +s +t +u +k +g +w +r +z +y +' + diff --git a/models/tcz/G_100000.pth b/models/tcz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f9320836074c8133db77bb405e1b9d18b9be25f --- /dev/null +++ b/models/tcz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e55bd8c36390a2dd8279a5297849b28a86992fa0b3167119483754eb16d50b4 +size 145488353 diff --git a/models/tcz/config.json b/models/tcz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tcz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tcz/vocab.txt b/models/tcz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c2b826fc2bf3ccaf1259a240bc6f32437ec9244a --- /dev/null +++ b/models/tcz/vocab.txt @@ -0,0 +1,43 @@ +| +a +n +i +g +h +u +t +m +k +r +w +l +e +c +s +p +â +d +o +z +v +ê +b +' +î +- +j +û +ā +ū +á +f +ē +í +é +ī +ó +ȇ +` +ō +ä + diff --git a/models/tdj/G_100000.pth b/models/tdj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d9cb1e95a2de3581ee3b891b3646080a99f23b49 --- /dev/null +++ b/models/tdj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142d4c1c285cc5612e788c36f51a8eb1c1896a4d1e2d8e9555dc5448b25ebb70 +size 145476065 diff --git a/models/tdj/config.json b/models/tdj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tdj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tdj/vocab.txt b/models/tdj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b6cd955972018b27ef242cb852960f8e5d19ad23 --- /dev/null +++ b/models/tdj/vocab.txt @@ -0,0 +1,27 @@ +y + +i +u +t +h +o +g +j +e +r +l +m +c +w +' +p +- +_ +v +a +ꞌ +k +s +n +b +d diff --git a/models/ted/G_100000.pth b/models/ted/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ff393a4f2dd39b23daad861c3c2e1bd593d59892 --- /dev/null +++ b/models/ted/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09aaf4f7967c1b93694a7449cdb73ea87ca8fa011f1215823d9452b5272be3e +size 145480687 diff --git a/models/ted/config.json b/models/ted/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ted/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ted/vocab.txt b/models/ted/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f379d8fec2f6c306f1d0fb94fdde5ce36bd63ac3 --- /dev/null +++ b/models/ted/vocab.txt @@ -0,0 +1,33 @@ +| +' +n +a +ʋ +‐ +ɛ +ɔ +e +ɩ +l +y +u +i +k +b +m +o +t +h +꞊ +d +w +g +s +p +r +c +j +f +ŋ +1 + diff --git a/models/tee/G_100000.pth b/models/tee/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..59e9cd28374ce01fec5a29df28471edce266c011 --- /dev/null +++ b/models/tee/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19472bdc72d7247a5fd9156e2a9fd1ab8eae0f604d7b701906aed8ffe5dd9b03 +size 145481466 diff --git a/models/tee/config.json b/models/tee/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tee/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tee/vocab.txt b/models/tee/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f3ecd386596e0dc816d522e20b3f891b70386988 --- /dev/null +++ b/models/tee/vocab.txt @@ -0,0 +1,34 @@ +ú +í +x +k +o +b +ó +e +y +m +n +h +p +á + +g +ʼ +a +t +s +z +v +i +é +c +ñ +d +f +l +u +j +_ +q +r diff --git a/models/tel/G_100000.pth b/models/tel/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..396b88f40bf29474ac37a0c63f32de15ac74386e --- /dev/null +++ b/models/tel/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2d8809c211ef5b0d73539228ea8125fed443c9419491e04d25140c9e2f0509 +size 145505369 diff --git a/models/tel/config.json b/models/tel/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tel/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tel/vocab.txt b/models/tel/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..50e4c53accd063e2b5d353ebfa5f5b0531975ebb --- /dev/null +++ b/models/tel/vocab.txt @@ -0,0 +1,65 @@ +త +ఞ +భ +థ +ర +న +ో +ఘ +' +ఐ +గ +ే +జ +ఔ +హ +ా +డ +మ +స + +క +ఒ +ూ +వ +ధ +శ +ౌ +ప +ః +ఠ +ఖ +ష +– +ణ +ఫ +ి +ొ +ఏ +ఎ +ృ +ఓ +య +అ +ీ +ె +_ +చ +ు +్ +ద +ల +ఉ +ఢ +- +బ +ఈ +ట +ఛ +ళ +ఊ +6 +ం +ఇ +ఆ +ై diff --git a/models/tem/G_100000.pth b/models/tem/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..12f39186822786dd94b7902e399a0854dd72121a --- /dev/null +++ b/models/tem/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619c920e3e58a56833481963da15a1104e7882cc45495f25cb86e9b8b040a2ec +size 145478387 diff --git a/models/tem/config.json b/models/tem/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tem/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tem/vocab.txt b/models/tem/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c064ce05d4ae677bb225ace02ecab92528484ced --- /dev/null +++ b/models/tem/vocab.txt @@ -0,0 +1,30 @@ +| +a +k +ɔ +i +t +ŋ +m +ə +r +n +ɛ +ȧ +s +y +e +b +u +o +h +l +p +w +f +- +d +ǝ +g +c + diff --git a/models/teo/G_100000.pth b/models/teo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca0313a8bf14c19cfd4bc6bc9a977c29ee8d1a19 --- /dev/null +++ b/models/teo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:096dbca8d973df082d6aa163ce92559f537346c9aaa0e1f77eccfb29f9a60d0c +size 145476203 diff --git a/models/teo/config.json b/models/teo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/teo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/teo/vocab.txt b/models/teo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..043fe2d09473d37a114d759cab18bfca0f64720e --- /dev/null +++ b/models/teo/vocab.txt @@ -0,0 +1,27 @@ +a +| +i +k +e +o +n +t +u +r +s +l +m +d +y +p +ŋ +b +j +w +c +g +' +- +— +– + diff --git a/models/ter/G_100000.pth b/models/ter/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8ffbd77c342fc8d308ccfa2673d434ff47d67f14 --- /dev/null +++ b/models/ter/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3143f71c5708b4b6b59b391d85125b63fc1cc0e6fb2338643f4129ae5e11600b +size 145485441 diff --git a/models/ter/config.json b/models/ter/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ter/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ter/vocab.txt b/models/ter/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3e05a3e777e2d29fce9607a98691de4811c46c35 --- /dev/null +++ b/models/ter/vocab.txt @@ -0,0 +1,39 @@ +| +o +k +e +i +n +a +t +u +p +v +h +y +x +ó +' +m +á +í +ú +s +é +â +r +ê +j +û +d +ô +l +g +— +‍ +z +b +î +f +- + diff --git a/models/tes/G_100000.pth b/models/tes/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..be1123256964be5ceaaaf377da88843cbf26dd2a --- /dev/null +++ b/models/tes/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75358c370eaf4eb4270e710b24e1ad6b44a298b5614255a92d9a4f611d6363e5 +size 145475341 diff --git a/models/tes/config.json b/models/tes/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tes/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tes/vocab.txt b/models/tes/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..11537f40e18c9e7debb6876f857a11fefd3caba7 --- /dev/null +++ b/models/tes/vocab.txt @@ -0,0 +1,26 @@ +k +f +a +s +l +r +p +t +- +' +h +j +g +m +c +u +b +_ +n +o +d +y + +i +w +e diff --git a/models/tew/G_100000.pth b/models/tew/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..65d56752f43e0c7f890be857d8fce250b8ed6711 --- /dev/null +++ b/models/tew/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7046c5a865097df9e19125b26b5749ac3a50bfb19cee3ba5605518053a433ebe +size 145492309 diff --git a/models/tew/config.json b/models/tew/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tew/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tew/vocab.txt b/models/tew/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..570b4bcb10cbe8253185a0de37024a7ae975ab37 --- /dev/null +++ b/models/tew/vocab.txt @@ -0,0 +1,48 @@ +n +ä +i +ú +é +̂ +z +- +f +k +s +ô +c +ḏ +_ +y +á +î +r +t +o +û +ˀ +g +í +̖ +e +w +v +b +p +́ +j +— +' +d +h +x +ꞌ +ê +q +a +m +l +â + +u +ó diff --git a/models/tex/G_100000.pth b/models/tex/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4099d2b2fea5ffe594ed6e06a5972f533058adfe --- /dev/null +++ b/models/tex/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:874780aea11528b450138bf12d708c40b645d785c46edbbcbd331d8b7e99f1fa +size 145479927 diff --git a/models/tex/config.json b/models/tex/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tex/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tex/vocab.txt b/models/tex/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..43e5de644c55f47dc4309a35f236894a381b6803 --- /dev/null +++ b/models/tex/vocab.txt @@ -0,0 +1,32 @@ +r +a +i +e +c +í +s +k +- +t +o +d +̱ +y +j +h +u +_ +l +w + +m +p +n +g +z +ṯ +á +' +ó +b +v diff --git a/models/tfr/G_100000.pth b/models/tfr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d02f28f9ac76161f232620b564fc7bfaea417485 --- /dev/null +++ b/models/tfr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca7944f10ac7b26ffaba1f37ae00a763fe98ef26144256e8f0fc9be1ae73c45 +size 145495267 diff --git a/models/tfr/config.json b/models/tfr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tfr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tfr/vocab.txt b/models/tfr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..646d4f7d43b7ed7943899e6815177a0a04488090 --- /dev/null +++ b/models/tfr/vocab.txt @@ -0,0 +1,52 @@ +| +o +a +k +g +j +e +š +n +r +s +i +b +ë +y +w +t +h +p +d +m +ö +ð +u +ä +î +õ +ã +ú +ê +c +z +l +í +é +û +f +ó +á +v +q +ò +ø +è +x +' +à +þ +ü +ï +ñ + diff --git a/models/tgj/G_100000.pth b/models/tgj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea20b0944ea8bb446032e1d80b1f97db216053cc --- /dev/null +++ b/models/tgj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1de4abaf04037efb24ed273344fc35596e663ec76924500788885aa128ffa10 +size 145481461 diff --git a/models/tgj/config.json b/models/tgj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tgj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tgj/vocab.txt b/models/tgj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d5011e179139e2741e463f541fe13d3e013cf010 --- /dev/null +++ b/models/tgj/vocab.txt @@ -0,0 +1,34 @@ +| +v +n +a +o +g +u +i +k +m +l +r +b +d +w +y +t +s +p +h +j +e +c +— +- +' +0 +4 +6 +1 +f +z +2 + diff --git a/models/tgk/G_100000.pth b/models/tgk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..13855691687aaa40a020ed2c600e162c40acfa32 --- /dev/null +++ b/models/tgk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4878819f88285bc5883831f45c1c305a8aa292b256ad63f5d1a59ac0fb7527 +size 145486817 diff --git a/models/tgk/config.json b/models/tgk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tgk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tgk/vocab.txt b/models/tgk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4e5c36d7373bf633d13a47b8f959171cde4683a3 --- /dev/null +++ b/models/tgk/vocab.txt @@ -0,0 +1,41 @@ +о +ҷ +б +к +я +т + +у +ҳ +ӯ +ч +е +ю +ц +з +м +в +п +ь +— +ф +ш +г +_ +ӣ +ж +э +ё +й +ғ +а +ъ +‐ +р +с +д +и +л +х +қ +н diff --git a/models/tgl/G_100000.pth b/models/tgl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f792a71702340fbd167d4e09d867ac785ebfa0ea --- /dev/null +++ b/models/tgl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df129aaf53ab5a0162cfabb7f9a7d45a7792f2dcc0051e160dba84a37bdcb2c9 +size 145488475 diff --git a/models/tgl/config.json b/models/tgl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tgl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tgl/vocab.txt b/models/tgl/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..731564886683fd535ce9f208a25aa2df50193259 --- /dev/null +++ b/models/tgl/vocab.txt @@ -0,0 +1,43 @@ +a +ʼ +x +g +- +o +m +s +i +u +5 +e +0 +z +v +6 +n +' +– +y +2 +f +c +p +r + +j +q +t +9 +b +d +1 +h +w +_ +l +3 +7 +8 +ñ +k +4 diff --git a/models/tgo/G_100000.pth b/models/tgo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..72cc1d9db1007cc0ed5a2af04f6e62ef8369244a --- /dev/null +++ b/models/tgo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c677aa168ef923f80b3630adf1e2e6dcd89e4c7cc8f710c79e63f1802d38da +size 145480691 diff --git a/models/tgo/config.json b/models/tgo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tgo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tgo/vocab.txt b/models/tgo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..13df9bd4c73bfbe016b0d67fe602e93b8b110590 --- /dev/null +++ b/models/tgo/vocab.txt @@ -0,0 +1,33 @@ +_ +' +— +b +y +w +2 +f +j +v +6 +1 +- +o +a +ɨ +r +h +ŋ +i +d +m +g +p +l +t +k + +n +0 +e +u +s diff --git a/models/tgp/G_100000.pth b/models/tgp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3961bac96d7fbe308f908a13eed8b2cf6275135b --- /dev/null +++ b/models/tgp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:556ed60be8ef528f103528a2a2223fe13de72b1e807382d199153d81a8585c1b +size 145488392 diff --git a/models/tgp/config.json b/models/tgp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tgp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tgp/vocab.txt b/models/tgp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0483fa1bae692b468d1bd8dd7df7edfd20ead307 --- /dev/null +++ b/models/tgp/vocab.txt @@ -0,0 +1,43 @@ +| +a +i +o +n +e +r +m +l +t +u +h +s +v +p +̃ +k +g +j +d +b +c +w +1 +y +2 +z +' +4 +3 +6 +f +0 +5 +9 +8 +7 +x +– +— +q +- + diff --git a/models/tha/G_100000.pth b/models/tha/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9d984169957b6955edb1a6fa318deceb04b598ad --- /dev/null +++ b/models/tha/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:044ed047e447eb2c7793adeaae53e4398d092ff5640449f36fb0b8e2c3b711e8 +size 145509977 diff --git a/models/tha/config.json b/models/tha/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tha/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tha/vocab.txt b/models/tha/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..11de8465ce535569b832e707dd7174fc16795d4e --- /dev/null +++ b/models/tha/vocab.txt @@ -0,0 +1,71 @@ +า +น +่ +ร +เ +้ +อ +ง +ก +ว +ะ +ั +ม +ท +พ +ย +ล +จ +ี +ค +ต +ด +ห +ข +ิ +แ +ส +บ +ป +ไ +ู +ใ +็ +ื +์ +ช +ุ +ึ +ํ +โ +ผ +ถ +ญ +ซ +ธ +ศ +ณ +ษ +ฟ +ภ +ฉ +ฝ +ฐ +ฤ +ฏ +ฮ +ฆ +๋ +ฎ +' +0 +๊ +ฑ +1 +4 +2 +- +ฬ +ฒ +ฌ + diff --git a/models/thk/G_100000.pth b/models/thk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cec3684a686434223f233ff302af94d9a49d9484 --- /dev/null +++ b/models/thk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5081caaf29378455e82aa10bfcd0820bc087499e5f91c82c178cde2a1aa73c4d +size 145476951 diff --git a/models/thk/config.json b/models/thk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/thk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/thk/vocab.txt b/models/thk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b1a41cd543f4e09df9e7dce741ca0a292c775ab5 --- /dev/null +++ b/models/thk/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +û +î +r +i +t +e +k +g +b +m +w +u +o +h +y +d +s +' +c +j +p +- +f +l + diff --git a/models/thl/G_100000.pth b/models/thl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..524f4abaaa31c34231a38edfe2520871b5670d05 --- /dev/null +++ b/models/thl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6837246852a1890a5d144774c7591d9ba495745a5448910f43e00c19ee2680f0 +size 145504607 diff --git a/models/thl/config.json b/models/thl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/thl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/thl/vocab.txt b/models/thl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c697a84cf52e2b73fdf3019e41f664009eba19e7 --- /dev/null +++ b/models/thl/vocab.txt @@ -0,0 +1,64 @@ +ृ +क +ड +ऊ +इ +् +ल +ठ +ो +ङ +ञ +व +ि +ऐ +ओ +झ +ँ +ढ +ः +छ +ऋ +ू +ब + +ौ +प +' +ध +ु +ण +थ +र +श +ष +_ +े +फ +ख +ई +त +ऽ +आ +‍ +स +ज +द +उ +म +- +य +ी +ग +ा +घ +ए +औ +ट +भ +ै +ह +अ +न +च +ं diff --git a/models/tih/G_100000.pth b/models/tih/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbba7193f1f251646542930f3841fb01d63a4468 --- /dev/null +++ b/models/tih/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cad6063e4ebdeaf012befe6ef8093197143a2d7a9ca8724e2f9d38b9445771 +size 145479779 diff --git a/models/tih/config.json b/models/tih/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tih/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tih/vocab.txt b/models/tih/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e4cd10cfd441b2fc0cfb0189dad29603835b76f2 --- /dev/null +++ b/models/tih/vocab.txt @@ -0,0 +1,32 @@ +t +u +4 +o +m +r +e +z +l +d +w +y +j +b +a +– +g +_ +n +p +s +k +- +i + +h +' +0 +1 +6 +c +2 diff --git a/models/tik/G_100000.pth b/models/tik/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4495ca1674c486126040026abe7e5d22cf3c822a --- /dev/null +++ b/models/tik/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af287448e661a0df157adc7b8b9f6e0303a5a8e69722a8e664b8a02372378a0 +size 145496055 diff --git a/models/tik/config.json b/models/tik/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tik/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tik/vocab.txt b/models/tik/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..384818bfeba2aaa938c8826f8b73a4d826003149 --- /dev/null +++ b/models/tik/vocab.txt @@ -0,0 +1,53 @@ +| +n +ɛ +i +m +w +y +ɓ +̀ +l +k +s +ʼ +u +a +ì +e +t +à +b +æ +ù +ɔ +o +h +z +c +f +̌ +ǹ +d +ɗ +ŋ +v +ò +g +p +ǎ +è +j +̂ +r +â +- +ǐ +î +ê +ǔ +û +ě +ǒ +ô + diff --git a/models/tir/G_100000.pth b/models/tir/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..dcd27f86a9b467bd5eaca12b96f559806e54c4d8 --- /dev/null +++ b/models/tir/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d344a252a677ec3f127d5bb372edbe202e4f084349b0e449d02813793e4523d1 +size 145476107 diff --git a/models/tir/config.json b/models/tir/config.json new file mode 100644 index 0000000000000000000000000000000000000000..993d1dedb1d0c8e820b98f9e2f019ff166327038 --- /dev/null +++ b/models/tir/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.uroman", + "validation_files": "dev.uroman", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tir/vocab.txt b/models/tir/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..29be0f20b63dddfab221c774186a4475ac3cd7f1 --- /dev/null +++ b/models/tir/vocab.txt @@ -0,0 +1,27 @@ + +- +a +b +c +d +e +f +g +h +i +j +k +l +m +n +o +p +q +r +s +t +u +w +x +y +z diff --git a/models/tkr/G_100000.pth b/models/tkr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a1e208db453ae89a027ee79a1cc32db18de59ef --- /dev/null +++ b/models/tkr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f53a35efd777098d9ceef7e5898fb1d99825466f33aeb6a1a9036de67c5a76 +size 145483749 diff --git a/models/tkr/config.json b/models/tkr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tkr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tkr/vocab.txt b/models/tkr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c0675d44f367dbc172e76a71389e6c33569d56a2 --- /dev/null +++ b/models/tkr/vocab.txt @@ -0,0 +1,37 @@ +| +a +e +n +' +i +ı +y +l +h +s +u +m +k +q +b +r +g +x +ş +d +v +ə +o +t +ç +c +ğ +p +z +– +ü +ö +- +̇ +f + diff --git a/models/tlb/G_100000.pth b/models/tlb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..26d355c396ebb99473aba214de7fe6aa2564c184 --- /dev/null +++ b/models/tlb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d46937cabb8ef0970a78792a8c33a326914cd5dc6b258f5fb75b68b0e08bcc +size 145488393 diff --git a/models/tlb/config.json b/models/tlb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tlb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tlb/vocab.txt b/models/tlb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..2664f396df63357419094413b2befdd284cbe22b --- /dev/null +++ b/models/tlb/vocab.txt @@ -0,0 +1,43 @@ +g +- +5 +y +z +a +l +ö +2 +w +n +_ +u +ï +ë +c +o + +d +e +i +6 +m +ü +9 +0 +̄ +v +f +ä +b +p +7 +1 +4 +r +s +3 +j +h +t +' +k diff --git a/models/tlj/G_100000.pth b/models/tlj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..12cfe4c5cdaab327cbdfbe31c688a8381d2f5382 --- /dev/null +++ b/models/tlj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae9d3da5e8de05fc4351552783e68e9cc098a608fb32990e9781babf056f481 +size 145478401 diff --git a/models/tlj/config.json b/models/tlj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tlj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tlj/vocab.txt b/models/tlj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..85f0fceaa7221e45708b78818c2d55f32ff6a2c0 --- /dev/null +++ b/models/tlj/vocab.txt @@ -0,0 +1,30 @@ +y +c +w +d + +n +s +- +̱ +k +ʼ +j +_ +a +l +o +t +f +5 +m +' +b +i +h +g +u +p +v +e +6 diff --git a/models/tly/G_100000.pth b/models/tly/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3bf93cc6bba83c64cf59475c7e0fde3440b65ffc --- /dev/null +++ b/models/tly/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a854151ff7a7477dce0a305bd8e2cdd5afbb711727fc38aeede141a70c771ac +size 145486060 diff --git a/models/tly/config.json b/models/tly/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tly/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tly/vocab.txt b/models/tly/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..004e24359715b1dbc255c6eb7d4ea65137b89b0f --- /dev/null +++ b/models/tly/vocab.txt @@ -0,0 +1,40 @@ +t +x +a +ğ +m +0 +i +5 +s +q +h +ə +r +v +p + +4 +d +n +y +– +c +z +ş +- +k +_ +b +o +u +e +l +1 +f +6 +2 +j +ı +ç +̇ diff --git a/models/tmc/G_100000.pth b/models/tmc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e9c06bcd7db1fb8446ec0372f11a2e8f5c3c573 --- /dev/null +++ b/models/tmc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fccada8475f0ad3ec3b03bb56170a2bcca5e6a0ccc0b800d3239954b73f0521 +size 145481451 diff --git a/models/tmc/config.json b/models/tmc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tmc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tmc/vocab.txt b/models/tmc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a95dc2c85348fb2336fdfea7ed4b2c7567b87ffc --- /dev/null +++ b/models/tmc/vocab.txt @@ -0,0 +1,34 @@ +n +l +b +o +i +y +ɔ +u +ɨ +- +ɛ +꞊ +ɓ +r +_ + +a +́ +ɗ +t +h +k +j +m +̰ +e +ᵻ +g +' +ḭ +w +d +s +p diff --git a/models/tmf/G_100000.pth b/models/tmf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1f4c1c25da6f1e77c4385e3c3f21a60ae00cd342 --- /dev/null +++ b/models/tmf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d919095dc6509f4c26118f81c5ea05737f3886cf00205e6032a16f374a47a3 +size 145486077 diff --git a/models/tmf/config.json b/models/tmf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tmf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tmf/vocab.txt b/models/tmf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..44be349851069370094f7baa021517c5fe8a0346 --- /dev/null +++ b/models/tmf/vocab.txt @@ -0,0 +1,40 @@ +a +| +k +e +n +m +l +h +o +t +p +i +s +' +g +v +q +r +u +j +— +c +d +b +f +y +0 +1 +z +2 +7 +4 +5 +x +3 +6 +9 +8 +ñ + diff --git a/models/tna/G_100000.pth b/models/tna/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c9f1dcc37f37b732927fd6e8d075b1cce0d936c --- /dev/null +++ b/models/tna/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20e4a26365cccaadb9275bd0365a00f93d25dfb7c8f29d2799f2db5a54659080 +size 145483775 diff --git a/models/tna/config.json b/models/tna/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tna/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tna/vocab.txt b/models/tna/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6b70329240912e7345c020dea6e469922ad7362b --- /dev/null +++ b/models/tna/vocab.txt @@ -0,0 +1,37 @@ +i +h +o +g +m +p +— +z +' +r +s +e +f +y +d +x +ñ + +í +n +k +t +q +_ +u +j +l +á +ó +0 +8 +v +a +b +ú +é +c diff --git a/models/tng/G_100000.pth b/models/tng/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfd94cf72383b089ce36b8d397b6de39843314ae --- /dev/null +++ b/models/tng/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69b3395682aeaf460ceb498528db9ce774ec182da1df5df1d92845bf1edb1463 +size 145482998 diff --git a/models/tng/config.json b/models/tng/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tng/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tng/vocab.txt b/models/tng/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ce58e201459d31b60f2df5703d0c6b5e2e0cf0f9 --- /dev/null +++ b/models/tng/vocab.txt @@ -0,0 +1,36 @@ +u +b +h +û +î +r +a +n +j +s +ÿ +v +g +y +â +ç +c +ô +m +o +ë + +- +_ +ê +d +ū +e +k +é +w +l +i +' +p +t diff --git a/models/tnk/G_100000.pth b/models/tnk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b91e1649fe82a7dd5a291b714fb2f6310882219 --- /dev/null +++ b/models/tnk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e83647bc9a3cfa10917f495e25672ef44b8e7bd690be14f8afa6f3e38913d09 +size 145480845 diff --git a/models/tnk/config.json b/models/tnk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tnk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tnk/vocab.txt b/models/tnk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b95e65f6f3e1c2cdbea160fead19eb6175bf2d18 --- /dev/null +++ b/models/tnk/vocab.txt @@ -0,0 +1,33 @@ +0 +d +n +b +s +r +w +ə +u +a +k +ɨ +ɡ +v +y +l + +m +' +4 +t +3 +_ +j +ǝ +- +e +h +o +f +i +p +g diff --git a/models/tnn/G_100000.pth b/models/tnn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed1ad5e710c61becbec3bf1831925b8556a14e66 --- /dev/null +++ b/models/tnn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77c743e70d46c3f03850e45086482a096e6a03ad98061fa362b997e79ae7a87b +size 145476085 diff --git a/models/tnn/config.json b/models/tnn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tnn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tnn/vocab.txt b/models/tnn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e27839c59d6c19f57e4bc4843f840e00d3413afa --- /dev/null +++ b/models/tnn/vocab.txt @@ -0,0 +1,27 @@ +ə +l +h +v +_ +s +m +o +n +g +i +ɨ +' +b +r +- +e +k +a +t +f +u +w +j + +d +p diff --git a/models/tnp/G_100000.pth b/models/tnp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..238d6aedd4a3744062a97da6b4ed1206db26e73e --- /dev/null +++ b/models/tnp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c858356634d539852aa382b177a89d9b35bb77c6c05573168fafa968ad7fa5a +size 145476869 diff --git a/models/tnp/config.json b/models/tnp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tnp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tnp/vocab.txt b/models/tnp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..02a738b60803652e472ff0c200437b5afaa45473 --- /dev/null +++ b/models/tnp/vocab.txt @@ -0,0 +1,28 @@ +_ +o +d +n +ə +u +e +h +m +r +g +f +ɨ +t +i +b +v +k +s +' +l + +w +a +— +- +j +p diff --git a/models/tnr/G_100000.pth b/models/tnr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7d2b22a9fe0c8017143f15914a49de2eded69d3 --- /dev/null +++ b/models/tnr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9c77da2ea908f705cabb322d18635a9b968f8206d21aa533829143d7f3592e +size 145483772 diff --git a/models/tnr/config.json b/models/tnr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tnr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tnr/vocab.txt b/models/tnr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d514728e50e1d5158b9c7c8301be411bdf63f79e --- /dev/null +++ b/models/tnr/vocab.txt @@ -0,0 +1,37 @@ +ó +h +j +_ +t +a +u +n +é +c +i +ŝ +ü +r +p +m +d +ƴ +s +ñ +g +ë +e +k +– +y +ɓ +o + +f +ɗ +— +ŋ +b +w +- +l diff --git a/models/tnt/G_100000.pth b/models/tnt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e08ebbca3ffad5e227169f433d96f0bf3f769dd2 --- /dev/null +++ b/models/tnt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97e83c36924e351561f9fe97d8fbed0d44f9bfe4f17bc1f0eaec5f5a214743b9 +size 145477609 diff --git a/models/tnt/config.json b/models/tnt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tnt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tnt/vocab.txt b/models/tnt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7777019f4d72db119a50746854bb00a22ca33844 --- /dev/null +++ b/models/tnt/vocab.txt @@ -0,0 +1,29 @@ +' +y +z +w +u +j +t +d +i +f +h +m + +é +_ +p +r +n +k +- +e +c +a +b +o +l +ó +g +s diff --git a/models/tob/G_100000.pth b/models/tob/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ace05870ff3be3403cb42a0d011ed812520e26c4 --- /dev/null +++ b/models/tob/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73757226ce4b47c96982e893a75ed4bb1583d80e0df01c32af044938d7f06de1 +size 145483769 diff --git a/models/tob/config.json b/models/tob/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tob/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tob/vocab.txt b/models/tob/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..43bbd52aef1d159703320a4505f897cd84625435 --- /dev/null +++ b/models/tob/vocab.txt @@ -0,0 +1,37 @@ +m +y +s +' +r +q +j +f +_ +i +a +c +k + +b +ú +á +h +t +í +e +o +x +ó +ñ +p +d +– +l +n +z +v +ỹ +é +g +u +- diff --git a/models/toc/G_100000.pth b/models/toc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b471633ab93cf856fc7f626b63354ecb6573dcea --- /dev/null +++ b/models/toc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfadba19ae016870774cb8e95e00a7c2c7ec9b25f7d140ca9f0faebbaf28c5a4 +size 145483739 diff --git a/models/toc/config.json b/models/toc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/toc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/toc/vocab.txt b/models/toc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bef210f33cc390784f17ae49d1aec2dbf6b6ad01 --- /dev/null +++ b/models/toc/vocab.txt @@ -0,0 +1,37 @@ +a +| +̲ +n +u +i +t +c +l +h +á +x +m +p +s +k +y +o +q +e +r +í +ú +j +z +d +é +b +— +g +ó +f +v +' +ñ +w + diff --git a/models/toh/G_100000.pth b/models/toh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bc0471d8f1ec566c10747050da3b1dab1a11b072 --- /dev/null +++ b/models/toh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10f7c3b090b25947525e655bc8cf51fa5155660058303e67e7b31b0d09d1cff +size 145480035 diff --git a/models/toh/config.json b/models/toh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/toh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/toh/vocab.txt b/models/toh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a29013bad3fc1fb3ceed1c2ee3099e74105b8a2e --- /dev/null +++ b/models/toh/vocab.txt @@ -0,0 +1,32 @@ +| +a +i +u +n +g +e +h +o +l +m +y +d +v +s +k +w +b +t +r +z +p +f +j +' +- +x +9 +– +1 +5 + diff --git a/models/tom/G_100000.pth b/models/tom/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a7c5b97f321c5a76b749f2507a4fc272932427c5 --- /dev/null +++ b/models/tom/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9c4f427810d9d025218a48b64cc4a4a93a09ca55a037f5a20f85b708ed01dfb +size 145476715 diff --git a/models/tom/config.json b/models/tom/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tom/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tom/vocab.txt b/models/tom/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..be902edfd3be491a9bad238da134fbb3fdb0b884 --- /dev/null +++ b/models/tom/vocab.txt @@ -0,0 +1,28 @@ +m +g +y +a +k +p +l +f +' +h +r +c +w +- +t +_ +d +b +é +z +e +i +n + +o +s +u +j diff --git a/models/tos/G_100000.pth b/models/tos/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2000f7580276b5f28f6d9733837bacae9cacaae5 --- /dev/null +++ b/models/tos/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ff0ce91471b4390ed6a8495238c0a6b9bb105d3f00f1917c2a2fe82ebed44e +size 145486053 diff --git a/models/tos/config.json b/models/tos/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tos/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tos/vocab.txt b/models/tos/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f56171027df07554b0eaffeae0947179f26ce33c --- /dev/null +++ b/models/tos/vocab.txt @@ -0,0 +1,40 @@ +a +̱ +| +i +u +n +t +c +h +l +x +m +k +p +á +s +y +o +q +í +j +ú +z +e +- +d +r +ó +b +g +é +f +v +' +ñ +0 +ï +ì +1 + diff --git a/models/tpi/G_100000.pth b/models/tpi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c7d8c3050e01d1ba449416aef72b8e2d45ddecb --- /dev/null +++ b/models/tpi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d88df53007653ea8e44035cb0ed1d00463f302bbcd8367075ac35bc9f49dd0a3 +size 145483765 diff --git a/models/tpi/config.json b/models/tpi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tpi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tpi/vocab.txt b/models/tpi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cdbe3de9ceb75a20339078946bee484ff6a70bea --- /dev/null +++ b/models/tpi/vocab.txt @@ -0,0 +1,37 @@ +' +l +_ +6 +o +e +s +7 +r +f +n +1 +u +4 +q +8 +k +- +i +2 +j +v +h +w +p +0 +9 +t +d +5 +m +y +g +a + +3 +b diff --git a/models/tpm/G_100000.pth b/models/tpm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5d4e201ded98cdd4d7bdaf3494dbe9723a36db58 --- /dev/null +++ b/models/tpm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33f7b408eb92a8b8f149a823dbd1833d69be6a9258c48051c6ba24b000a56bfd +size 145478483 diff --git a/models/tpm/config.json b/models/tpm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tpm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tpm/vocab.txt b/models/tpm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c498a0844868242d866252ab633d5e80f909b42e --- /dev/null +++ b/models/tpm/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +n +u +m +o +k +ɛ +s +t +b +y +l +e +h +w +d +ɔ +r +f +g +c +ŋ +p +z +v +j +' + diff --git a/models/tpp/G_100000.pth b/models/tpp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..80c360bbf24b26ca69a5bff07429da356f53ba2e --- /dev/null +++ b/models/tpp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae8519bdef9b01d62da4c9a8fb1e4efe80187b4affa8134f009e95f508b4774 +size 145489911 diff --git a/models/tpp/config.json b/models/tpp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tpp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tpp/vocab.txt b/models/tpp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6308f160bd0563f351365c2470fe911cc6bef7e5 --- /dev/null +++ b/models/tpp/vocab.txt @@ -0,0 +1,45 @@ +a +| +i +n +t +h +' +l +u +k +c +y +s +x +j +m +p +w +o +e +r +d +b +ú +— +g +é +f +ó +í +v +z +á +q +0 +4 +́ +1 +6 +ñ +` +2 +- +3 + diff --git a/models/tpt/G_100000.pth b/models/tpt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9877a95e0733f4e15f5a0817f25b51787e9f989c --- /dev/null +++ b/models/tpt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28da7015079a586187fdee9300a96c79ff3275367bf7a4782abf5991cf3def64 +size 145486817 diff --git a/models/tpt/config.json b/models/tpt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tpt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tpt/vocab.txt b/models/tpt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f65e8ee77c79e14ff92d57baf880d0b4fec2858c --- /dev/null +++ b/models/tpt/vocab.txt @@ -0,0 +1,41 @@ +a +| +n +i +t +u +k +l +ʼ +h +y +c +s +x +j +o +p +m +q +v +e +r +d +ú +b +— +í +é +g +f +ó +á +z +0 +4 +1 +3 +ñ +2 +- + diff --git a/models/trc/G_100000.pth b/models/trc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c584f4973d51b3dd802a91650862ef79131d1daa --- /dev/null +++ b/models/trc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5174204fb6f3b710706078bcfb8abb6051b57103995d153977e46faaa9af3a0a +size 145486859 diff --git a/models/trc/config.json b/models/trc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/trc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/trc/vocab.txt b/models/trc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b6c6d028eebe1db7053e6d91265d93a42d3e2bab --- /dev/null +++ b/models/trc/vocab.txt @@ -0,0 +1,41 @@ +n +r +o +h +ó +l +á +ô +y +e +d +ú +f +ꞌ +j +â +z +ñ +m +b +‐ +q +í +é +ê +i +x +û +p +ü +s +v +a +_ +̱ + +t +c +k +u +g diff --git a/models/tri/G_100000.pth b/models/tri/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..363b0660e349b228f608376fae679571408d118f --- /dev/null +++ b/models/tri/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1517a816fe0d1569db5ec33c7e0fdc398c7df65a540bc0ade7a538bdf4d62c5 +size 145484557 diff --git a/models/tri/config.json b/models/tri/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tri/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tri/vocab.txt b/models/tri/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a949c65b3b58ce466a9b96ef177dd4a0125bedf3 --- /dev/null +++ b/models/tri/vocab.txt @@ -0,0 +1,38 @@ +| +a +n +e +ë +o +t +i +r +k +m +p +ï +h +j +w +u +s +' +ʻ +- +0 +1 +2 +3 +7 +4 +5 +6 +9 +8 +l +z +y +g +f +ƒ + diff --git a/models/trn/G_100000.pth b/models/trn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c9dda2564e431a1e8ee9002d43b6afaa38706233 --- /dev/null +++ b/models/trn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:309cfbb565527c4caa3cb51a7febcd4da08830dac0805d7806bad278c6960dc2 +size 145483735 diff --git a/models/trn/config.json b/models/trn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/trn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/trn/vocab.txt b/models/trn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..76c68a10c941d9fdecaeceaa0a983cd84be5dd52 --- /dev/null +++ b/models/trn/vocab.txt @@ -0,0 +1,37 @@ +_ +z +ñ +á +x +g +c +e +o +v +y +— +k +i +p +l +f +a +s +ó +u +ú +t +d +1 +n +q + +é +h +j +í +‐ +b +' +m +r diff --git a/models/trs/G_100000.pth b/models/trs/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4baa37606e940d4d8e75b849b3ee90353a612042 --- /dev/null +++ b/models/trs/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8180f949f08cba7510840a287ed000c7fa995e928a7da0ff41b9e8f556c5e5b4 +size 145496065 diff --git a/models/trs/config.json b/models/trs/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/trs/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/trs/vocab.txt b/models/trs/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0ab0cc053766d5e6a786e11df2e59b85438de0a7 --- /dev/null +++ b/models/trs/vocab.txt @@ -0,0 +1,53 @@ +n +p +r +c +ï +' +î +̀ +ó + +ú +- +q +l +k +e +_ +d +h +ë +u +ò +û +ì +z +b +x +à +ù +s +í +o +̱ +v +t +̂ +g +ḯ +è +á +j +ñ +— +́ +m +i +â +y +f +é +ô +a +ê diff --git a/models/tso/G_100000.pth b/models/tso/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2187593e92d48e23232a0305b7287778e3c4bc8 --- /dev/null +++ b/models/tso/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a310137a01f4b729dbf7a556325e8c38f1428001f3a6ced932e02984bb4d10f +size 145478413 diff --git a/models/tso/config.json b/models/tso/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tso/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tso/vocab.txt b/models/tso/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..6f966e02009751d79cd048fff42bbf177f7849a0 --- /dev/null +++ b/models/tso/vocab.txt @@ -0,0 +1,30 @@ +h +z +w +s +' +l +r +m +o +p +v +k +j +g +n +d +i +t +- +f +a +x +b +y +c +_ + +e +u +q diff --git a/models/tsz/G_100000.pth b/models/tsz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ea02ee84273dd1cd9269b95eabf639163bac8cb6 --- /dev/null +++ b/models/tsz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d730c0debd7c786b307d17ddd7626e1fe41d378cde262a0739dd220bbd07c436 +size 145482989 diff --git a/models/tsz/config.json b/models/tsz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tsz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tsz/vocab.txt b/models/tsz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..96165c316a2f1f4e63dd03eda3b6e68857487138 --- /dev/null +++ b/models/tsz/vocab.txt @@ -0,0 +1,36 @@ +a +| +i +n +k +e +u +s +r +t +h +j +m +p +ï +c +á +d +g +o +b +é +í +ó +x +' +ú +l +– +f +î +- +́ +v +` + diff --git a/models/ttc/G_100000.pth b/models/ttc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6509eba8b816198ece091626d3e8c32ffe69e209 --- /dev/null +++ b/models/ttc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa24dcfae08262afdab82a8043909c2535a796c3c55fccc1837672fea8aa73b3 +size 145483737 diff --git a/models/ttc/config.json b/models/ttc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ttc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ttc/vocab.txt b/models/ttc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..429a5b221779c103626551a586b5e2899a899ace --- /dev/null +++ b/models/ttc/vocab.txt @@ -0,0 +1,37 @@ +— +d +n +j +a +r +á +f +e +_ +t +b +í +v +y +k +z +u +x +p +l +s +h +i +- +ú +o +q +m +ñ +g +é +w +c +' +ó + diff --git a/models/tte/G_100000.pth b/models/tte/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3de850b04d9b2465464c769adc63acbc8e129620 --- /dev/null +++ b/models/tte/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7bb219027157169d3dd5b82d44aca207e9fb53fe182d39fcdc91fcf08ed846 +size 145476877 diff --git a/models/tte/config.json b/models/tte/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tte/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tte/vocab.txt b/models/tte/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f7320679680fea2325f5edcec1cb124847e49bd4 --- /dev/null +++ b/models/tte/vocab.txt @@ -0,0 +1,28 @@ +1 +w +6 +y +' +t +- +p +s +m +n +v +b +4 +f +l +_ +k +a +o +3 +g +i +d +u +0 + +e diff --git a/models/ttq-script_tifinagh/G_100000.pth b/models/ttq-script_tifinagh/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfb7b3fd01936e887d89d7c49e6d98c10f80df6d --- /dev/null +++ b/models/ttq-script_tifinagh/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a4c7164c4f6303893c214d52ee1d035d687d01c7ab1b46091efb82ee7953b7 +size 145475333 diff --git a/models/ttq-script_tifinagh/config.json b/models/ttq-script_tifinagh/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ttq-script_tifinagh/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ttq-script_tifinagh/vocab.txt b/models/ttq-script_tifinagh/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9d8d9bb4b08bdca333d077df55085a117fb85428 --- /dev/null +++ b/models/ttq-script_tifinagh/vocab.txt @@ -0,0 +1,26 @@ +| +ⵏ +ⵜ +ⴹ +ⵙ +ⵓ +ⵍ +ⵎ +ⵔ +ⴰ +ⵢ +ⵗ +ⴾ +ⴶ +ⵤ +⵿ +ⴼ +ⵛ +ⵀ +ⵂ +ⵆ +ⵈ +ⵌ +' +ⵑ + diff --git a/models/tue/G_100000.pth b/models/tue/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..792ec31ea05a23e6079d88a9019afa8a6b9f600d --- /dev/null +++ b/models/tue/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f5d1c5a16a213a96fea2d37b440f5bda64b0dc9a8e536a88364c6d734781069 +size 145490693 diff --git a/models/tue/config.json b/models/tue/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tue/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tue/vocab.txt b/models/tue/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bbcd41f38d12313b07113fb97cb4f71f4bb9da22 --- /dev/null +++ b/models/tue/vocab.txt @@ -0,0 +1,46 @@ +| +r +i +a +̶ +e +u +c +ã +t +o +m +ĩ +n +ũ +p +s +d +g +y +j +w +b +õ +ñ +ẽ +é +á +ú +í +ó +́ +ṹ +q +l +h +ṍ +— +f +v +z +- +' +x +k + diff --git a/models/tuf/G_100000.pth b/models/tuf/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9a2c63acbc737214495ef50f7c694a82a251e3e4 --- /dev/null +++ b/models/tuf/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd1d296b7e00f45684b06e87e69e4169332666d9ee275b6f432c197adec23b63 +size 145483767 diff --git a/models/tuf/config.json b/models/tuf/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tuf/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tuf/vocab.txt b/models/tuf/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..836b28befbfaf3f60ae76945bccc1e0987edb31b --- /dev/null +++ b/models/tuf/vocab.txt @@ -0,0 +1,37 @@ +̃ +j +u +z + +y +_ +b +ñ +— +a +x +o +m +f +w +i +r +c +e +h +t +n +k +l +ó +s +v +d +p +- +á +í +ú +g +q +é diff --git a/models/tuk-script_arabic/G_100000.pth b/models/tuk-script_arabic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..25bd90d392da0ceaf1e385dbd4b52ab3ee1bb03c --- /dev/null +++ b/models/tuk-script_arabic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:995d3adb476dd41b1f27daad3c11dbfb1ee7658991851e77a0c8cea772f1f0ae +size 145486831 diff --git a/models/tuk-script_arabic/config.json b/models/tuk-script_arabic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tuk-script_arabic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tuk-script_arabic/vocab.txt b/models/tuk-script_arabic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a83f638888c9a94ede432074f55835394b3beba1 --- /dev/null +++ b/models/tuk-script_arabic/vocab.txt @@ -0,0 +1,41 @@ +ی +| +ا +ن +ر +ل +د +و +گ +م +س +ب +ه +ت +ق +ز +ش +پ +ک +آ +غ +چ +ع +خ +ج +ح +- +ف +ط +ص +‍ +ض +ظ +ئ +ث +ٰ +ذ +– +' +ژ + diff --git a/models/tuk-script_latin/G_100000.pth b/models/tuk-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5106aa9650344a868a4a0f1fbd395a640a02c6e8 --- /dev/null +++ b/models/tuk-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21878a8a1d53234045863d5891922787bffe3c0234d72cf78bf5e9b492abab1e +size 145483099 diff --git a/models/tuk-script_latin/config.json b/models/tuk-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tuk-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tuk-script_latin/vocab.txt b/models/tuk-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3ad5e7649a4cf3e0383169e058934de06be7babe --- /dev/null +++ b/models/tuk-script_latin/vocab.txt @@ -0,0 +1,36 @@ +ş +ý +b +m +y +w +ž +s +a +g +ü +h +' +z +_ +u +k +i +ň +d +f +o +t +ä +- +j +r +ö +– +n +ç +‐ + +p +l +e diff --git a/models/tuo/G_100000.pth b/models/tuo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..28a5dec46c9c1f5a2fc56fb2c2fd3a30a9acb4fb --- /dev/null +++ b/models/tuo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c349ced0935482c440cc794cc61244991de06cda201db451820616133703d119 +size 145491433 diff --git a/models/tuo/config.json b/models/tuo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tuo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tuo/vocab.txt b/models/tuo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cd6b00c5b643ce88df4b5a823f8f819faf91b8eb --- /dev/null +++ b/models/tuo/vocab.txt @@ -0,0 +1,47 @@ +| +a +e +r +i +ʉ +' +o +ã +c +t +n +s +m +w +u +j +p +ĩ +̃ +ẽ +õ +y +g +b +d +ñ +q +ũ +í +́ +l +ú +é +— +̶ +ó +á +f +h +v +ṍ +z +ṹ +k +x + diff --git a/models/tur/G_100000.pth b/models/tur/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ea807434bfee59f1e908f103c7ab42a3c4cad00 --- /dev/null +++ b/models/tur/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39985ad9b47a603002d2c31c55b9107d76d1eeec26d5bec7020f67d85129ca9 +size 145488374 diff --git a/models/tur/config.json b/models/tur/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tur/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tur/vocab.txt b/models/tur/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..71421ff99fb90d7b7beeb7517f570e66f856c377 --- /dev/null +++ b/models/tur/vocab.txt @@ -0,0 +1,43 @@ +d +h +e +ş +̇ +n +ü +û +î +ı +b +i + +â +_ +f +3 +r +z +c +m +v +p +' +ö +ğ +6 +0 +g +2 +u +a +k +j +ç +4 +l +y +t +o +1 +s +- diff --git a/models/tvw/G_100000.pth b/models/tvw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e3766c5680719b5d7b814cca1045a45bee0bdafe --- /dev/null +++ b/models/tvw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c09393e68e462c24b0048267bbe924d4c9272ad18a3d5c5c53532c3923b2be +size 145477617 diff --git a/models/tvw/config.json b/models/tvw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tvw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tvw/vocab.txt b/models/tvw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..11e6374a21aeddb675146dcd9258073da7ae095c --- /dev/null +++ b/models/tvw/vocab.txt @@ -0,0 +1,29 @@ +d +y +f +o +s +e +r +j +5 +w +l +b +k +i +a +h +c +n +4 +0 +u +_ +' +p +m +g + +t +- diff --git a/models/twb/G_100000.pth b/models/twb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5854ab1196906b5e49e3c7642f04c22f7462b836 --- /dev/null +++ b/models/twb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecb42dc1dc0d2317f85016a920ea36664f302bcafaaa791759067cb19df32620 +size 145479138 diff --git a/models/twb/config.json b/models/twb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/twb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/twb/vocab.txt b/models/twb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..25705916c64c3bc1945bb6d368b1a52bec91d1b3 --- /dev/null +++ b/models/twb/vocab.txt @@ -0,0 +1,31 @@ +6 +i +o +d +3 +u +2 + +- +r +b +y +e +' +s +_ +g +w +1 +a +k +p +4 +t +f +0 +7 +l +v +m +n diff --git a/models/twe/G_100000.pth b/models/twe/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..069bccf15cfaa21fd9a62958a9dbc99416546738 --- /dev/null +++ b/models/twe/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7a3aab9e853c065c6a0260c343c23649ba78c36b0ab28d3a3eb2ff0d69357dc +size 145477609 diff --git a/models/twe/config.json b/models/twe/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/twe/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/twe/vocab.txt b/models/twe/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1ea3f5163ed4aaa8d0d2edd08e085db599591b2e --- /dev/null +++ b/models/twe/vocab.txt @@ -0,0 +1,29 @@ +i +o +' +a +b +j +n +v + +g +h +f +x +y +u +e +m +- +w +k +p +q +_ +l +r +t +s +c +d diff --git a/models/twu/G_100000.pth b/models/twu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ab0b833b0b767ac2dd0ca20d4b3b987dcc16a328 --- /dev/null +++ b/models/twu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565f3b167b021230fe9383fe31fcbbf26b4310158a8ecb64bf88d955ce11b456 +size 145477073 diff --git a/models/twu/config.json b/models/twu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/twu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/twu/vocab.txt b/models/twu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..02be69fbee1933ef67552754a7d4704456d6a7aa --- /dev/null +++ b/models/twu/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +e +o +l +i +u +m +s +t +k +d +h +b +' +f +- +g +p +y +r +z +w +j +c +9 + diff --git a/models/txa/G_100000.pth b/models/txa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..65d78cbc4a01221e17c550ec7fc1774614d1d98d --- /dev/null +++ b/models/txa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29aa62f8108525deec7a0baa7ffe49e859e9261beb2d2cdb1c2fbb3ccf2be84b +size 145473017 diff --git a/models/txa/config.json b/models/txa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/txa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/txa/vocab.txt b/models/txa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bfaf80ddcd08d4803d1f98da263fc9be951d7522 --- /dev/null +++ b/models/txa/vocab.txt @@ -0,0 +1,23 @@ +y +' +m +h + +j +a +k +o +u +g +l +r +p +i +d +_ +- +n +s +b +w +t diff --git a/models/txq/G_100000.pth b/models/txq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..69c48e404b97556caec7dda355e14cc4f1f79bf5 --- /dev/null +++ b/models/txq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cea6974880de11eee9d5bd18fa9114a57d0c147f514f78367f39bddceb9d4876 +size 145485291 diff --git a/models/txq/config.json b/models/txq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/txq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/txq/vocab.txt b/models/txq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..50ac23f6f38d59b9684bf92725e53e1c8275f171 --- /dev/null +++ b/models/txq/vocab.txt @@ -0,0 +1,39 @@ +y +r +ꞌ + +2 +5 +t +1 +g +a +4 +b +3 +0 +f +d +' +8 +o +m +k +s +- +w +_ +e +p +i +— +h +9 +6 +n +z +l +u +c +7 +j diff --git a/models/txu/G_100000.pth b/models/txu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..76dd4673c39d01395f87fab938e095a16de9d502 --- /dev/null +++ b/models/txu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7797dacf6b58858e28c62eccb521b8c9aaf183c9751ef4a2c0ded56b4e6b81 +size 145488463 diff --git a/models/txu/config.json b/models/txu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/txu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/txu/vocab.txt b/models/txu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c0b2ede2d45afb8e2c45c6260139b0d5339a9442 --- /dev/null +++ b/models/txu/vocab.txt @@ -0,0 +1,43 @@ +õ +' +2 +j +u +0 +ã +1 +3 +ĩ +ũ +9 +ê +ỳ +7 +8 +ô +m +a +6 +k +ẽ +— +w +o +4 +g +i +x +t +y +ỹ +e +5 +r +n +d + +h +à +p +b +_ diff --git a/models/tye/G_100000.pth b/models/tye/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d9283ec3eed731c4d65f96d5f110db0dbe528b5 --- /dev/null +++ b/models/tye/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8728981dbbca090ca1c468bbf9dc02be9a8e8f8039cd8ac0576d0aedf3071174 +size 145492221 diff --git a/models/tye/config.json b/models/tye/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tye/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tye/vocab.txt b/models/tye/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..02fad2c3fe11a76f70063f4ac68b94701c4a8dc0 --- /dev/null +++ b/models/tye/vocab.txt @@ -0,0 +1,48 @@ +r +l +_ +f +s +ḿ +í +v +ì +á +à +d +ã +ò +h +́ +ń +ó + +i +ǹ +b +o +j +ũ +̀ +z +è +g +w +u +c +é +m +ĩ +n +k +t +e +' +y +ɔ +ù +a +ɛ +̃ +p +ú diff --git "a/models/tzh-dialect_bachaj\303\263n/G_100000.pth" "b/models/tzh-dialect_bachaj\303\263n/G_100000.pth" new file mode 100644 index 0000000000000000000000000000000000000000..de1faf6445dc7b7e3d0cb8bbaf02f746991b0d0d --- /dev/null +++ "b/models/tzh-dialect_bachaj\303\263n/G_100000.pth" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2de3e8db01fe59b29c0e0fa28f6615efebdf8f9a5156cb2b57dde33b013ce1dc +size 145483001 diff --git "a/models/tzh-dialect_bachaj\303\263n/config.json" "b/models/tzh-dialect_bachaj\303\263n/config.json" new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ "b/models/tzh-dialect_bachaj\303\263n/config.json" @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git "a/models/tzh-dialect_bachaj\303\263n/vocab.txt" "b/models/tzh-dialect_bachaj\303\263n/vocab.txt" new file mode 100755 index 0000000000000000000000000000000000000000..b014551343f651c7556a396224bbda408f709c91 --- /dev/null +++ "b/models/tzh-dialect_bachaj\303\263n/vocab.txt" @@ -0,0 +1,36 @@ +| +a +t +c +e +i +l +' +u +h +o +n +s +y +b +m +j +x +p +w +r +q +d +- +ú +g +í +é +ó +f +á +z +v +ñ +6 + diff --git a/models/tzh-dialect_tenejapa/G_100000.pth b/models/tzh-dialect_tenejapa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5c45a2608ce06a0fec886f93a2c1c07779b7be8 --- /dev/null +++ b/models/tzh-dialect_tenejapa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f764f7f5176ed0fd23fbba66112628d1f3e53b183e64ab53ada697916bbe35 +size 145483735 diff --git a/models/tzh-dialect_tenejapa/config.json b/models/tzh-dialect_tenejapa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tzh-dialect_tenejapa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tzh-dialect_tenejapa/vocab.txt b/models/tzh-dialect_tenejapa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..37ef3c212abe8e21cd411503bb261cfa452225dd --- /dev/null +++ b/models/tzh-dialect_tenejapa/vocab.txt @@ -0,0 +1,37 @@ +| +a +e +t +i +c +l +' +u +o +y +s +n +j +b +m +x +h +q +p +w +r +d +— +- +g +ú +ó +f +é +í +z +v +ñ +á +k + diff --git a/models/tzj-dialect_eastern/G_100000.pth b/models/tzj-dialect_eastern/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6e2edebcc080a986e54845a982f6b5e1db412fe --- /dev/null +++ b/models/tzj-dialect_eastern/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01ac18f13ded91a7089d4d847353a0bcd9791ff5c1ee128d8c4ab37115f4abb8 +size 145483613 diff --git a/models/tzj-dialect_eastern/config.json b/models/tzj-dialect_eastern/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tzj-dialect_eastern/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tzj-dialect_eastern/vocab.txt b/models/tzj-dialect_eastern/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..8518c33301522b7f72b221a021e856b82ae33165 --- /dev/null +++ b/models/tzj-dialect_eastern/vocab.txt @@ -0,0 +1,37 @@ +a +x +n +i +o +p +í +d + +_ +r +e +é +— +ú +' +m +l +j +b +y +z +u +ñ +c +k +h +w +s +t +- +v +á +f +q +g +ó diff --git a/models/tzj-dialect_western/G_100000.pth b/models/tzj-dialect_western/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8930e8e68f2c57a221740f555dcd464e38322376 --- /dev/null +++ b/models/tzj-dialect_western/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c51a38a5e39cc3ff8608563b530c3099067c06b9362786fc19a4c7bb14f4ad +size 145483757 diff --git a/models/tzj-dialect_western/config.json b/models/tzj-dialect_western/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tzj-dialect_western/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tzj-dialect_western/vocab.txt b/models/tzj-dialect_western/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5bf3152adf46dc4c41a31689182e0768339c07ad --- /dev/null +++ b/models/tzj-dialect_western/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +j +n +c +' +e +o +r +t +u +h +l +k +m +x +w +q +b +s +p +z +y +d +g +— +í +ú +á +f +ó +é +v +̱ +ñ + diff --git a/models/tzo-dialect_chamula/G_100000.pth b/models/tzo-dialect_chamula/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0cf0a7f41e2178d75f19b8fd4c7873d9cd440d87 --- /dev/null +++ b/models/tzo-dialect_chamula/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38961164cce2f0a565479799c0fb5558b135bc99d9555012c43914248fbc70c +size 145483003 diff --git a/models/tzo-dialect_chamula/config.json b/models/tzo-dialect_chamula/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/tzo-dialect_chamula/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/tzo-dialect_chamula/vocab.txt b/models/tzo-dialect_chamula/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..092898ac499c79f384fab842bf30892d42c67868 --- /dev/null +++ b/models/tzo-dialect_chamula/vocab.txt @@ -0,0 +1,36 @@ +| +a +t +i +c +l +u +' +e +o +j +n +s +x +b +y +h +m +v +q +p +r +d +— +- +í +g +f +á +ú +z +é +ó +k +ñ + diff --git "a/models/tzo-dialect_chenalh\303\263/G_100000.pth" "b/models/tzo-dialect_chenalh\303\263/G_100000.pth" new file mode 100644 index 0000000000000000000000000000000000000000..b80e3c2809a376fda541ecc6fcaad080fe8097d7 --- /dev/null +++ "b/models/tzo-dialect_chenalh\303\263/G_100000.pth" @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c0fd3bcb9880f446ad8132b7f73b17ffacf9ba4cb523db0bd9f458ef3d0193d +size 145483015 diff --git "a/models/tzo-dialect_chenalh\303\263/config.json" "b/models/tzo-dialect_chenalh\303\263/config.json" new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ "b/models/tzo-dialect_chenalh\303\263/config.json" @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git "a/models/tzo-dialect_chenalh\303\263/vocab.txt" "b/models/tzo-dialect_chenalh\303\263/vocab.txt" new file mode 100755 index 0000000000000000000000000000000000000000..82f25f860553ad505b17e3c2ae64b3fddbef2cbf --- /dev/null +++ "b/models/tzo-dialect_chenalh\303\263/vocab.txt" @@ -0,0 +1,36 @@ +| +a +c +i +l +t +' +u +o +e +n +s +j +h +b +y +x +m +v +q +p +r +d +— +g +z +- +í +f +ó +é +ú +á +k +ñ + diff --git a/models/ubl/G_100000.pth b/models/ubl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b5d3da9d4811d7cc7d4ab3db49c17abfef4b143 --- /dev/null +++ b/models/ubl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:920172f40b283870ed79488a8f8048993cc16d7436efa5896a22976e954f16f2 +size 145483777 diff --git a/models/ubl/config.json b/models/ubl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ubl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ubl/vocab.txt b/models/ubl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b6a62609d3921d5544c8b2431f79e8edf3d765b4 --- /dev/null +++ b/models/ubl/vocab.txt @@ -0,0 +1,37 @@ +j +a +_ +' +k +e +w +i +f +u +o +d +n +l +5 +h + +- +v +4 +6 +p +t +3 +g +c +0 +b +m +2 +s +z +x +q +r +1 +y diff --git a/models/ubu/G_100000.pth b/models/ubu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..11994b33210aca03ee251a48121dda9d4b20b5b1 --- /dev/null +++ b/models/ubu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6617c74404009ba17c84e09bfa78efcef32f639485ab1ae42caf69ab5f081637 +size 145476848 diff --git a/models/ubu/config.json b/models/ubu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ubu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ubu/vocab.txt b/models/ubu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e4113b0c1fc5b5ad6e6bfb51cd2aad237063c351 --- /dev/null +++ b/models/ubu/vocab.txt @@ -0,0 +1,28 @@ +r + +i +' +o +ú +n +t +p +s +e +ó +í +m +w +a +u +_ +á +b +g +j +l +y +- +k +d +é diff --git a/models/udm/G_100000.pth b/models/udm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..38694b645b31810631a28455b9ee338cb3a4e793 --- /dev/null +++ b/models/udm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee4712269e73f624cdc0852a4533101a557238afa4628c08c8a2670ac7e9d0e9 +size 145487621 diff --git a/models/udm/config.json b/models/udm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/udm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/udm/vocab.txt b/models/udm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ebc6210582c0b943c637a29e913a96b4b779820b --- /dev/null +++ b/models/udm/vocab.txt @@ -0,0 +1,42 @@ +в +я +ж +ӥ +й +ш +ӟ +ӵ +у +б +и +ӝ +- +о +щ +л +п +м +ю +ц +т +– +д +а +г +э +к +е +р +ё +ч + +ы +ь +ӧ +с +н +_ +ф +ъ +з +х diff --git a/models/udu/G_100000.pth b/models/udu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1184e3a831d2ffd2d13bf104f8f53690a0c68f43 --- /dev/null +++ b/models/udu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b147a4b79209ab222e50378322370b314d95e291da75b59d1fce6a72519730a +size 145481439 diff --git a/models/udu/config.json b/models/udu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/udu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/udu/vocab.txt b/models/udu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..05dea52d9bedea017ca8b5bd0d1fb444686002fb --- /dev/null +++ b/models/udu/vocab.txt @@ -0,0 +1,34 @@ +| +i +a +m +o +n +k +u +d +h +e +s +' +t +l +w +g +b +y +r +p +j +̱ +c +ḵ +ṯ +ŋ +ẖ +— +– +̧ +f +- + diff --git a/models/uig-script_arabic/G_100000.pth b/models/uig-script_arabic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfa54897a07c9cfe32dcf598b24083eb0746c9bb --- /dev/null +++ b/models/uig-script_arabic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b277eec101a058c18a38c1cf487d7bb19434cbc9e0001904c446c4296dbe7e3 +size 145486832 diff --git a/models/uig-script_arabic/config.json b/models/uig-script_arabic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/uig-script_arabic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/uig-script_arabic/vocab.txt b/models/uig-script_arabic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7b15865a1637b343cd9be648a9ce35554cec8d31 --- /dev/null +++ b/models/uig-script_arabic/vocab.txt @@ -0,0 +1,41 @@ +ى +| +ا +ە +ل +ن +ر +ۇ +د +ئ +ت +ق +م +ي +ب +س +ك +ش +ڭ +غ +پ +ې +ز +ۈ +و +ھ +چ +گ +خ +ۋ +ۆ +- +ج +ف +0 +1 +2 +6 +5 +4 + diff --git a/models/uig-script_cyrillic/G_100000.pth b/models/uig-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..63449b9143cf646b4b0bf5608e25d1209fca275e --- /dev/null +++ b/models/uig-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d25ecf8282c569df2a22e49a0065290738ac295ca4c8b9d84d2c8794f093c3c5 +size 145492197 diff --git a/models/uig-script_cyrillic/config.json b/models/uig-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/uig-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/uig-script_cyrillic/vocab.txt b/models/uig-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..107ca6d58ffc5db649e9cda9913d2c03120e9bf6 --- /dev/null +++ b/models/uig-script_cyrillic/vocab.txt @@ -0,0 +1,48 @@ +ю +һ +­ +ә +х +_ +ь +й +е +м +г +ё +ч +б +қ +л +с +к +я +ф +т +о +4 +д +ж +ө +н +ш +ц +р +- +1 +з +2 +ң +0 +җ +у +ъ +а +ы + +в +п +и +— +ғ +ү diff --git a/models/ukr/G_100000.pth b/models/ukr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..26011f90945f5995da6c6cc62ca63c1e24a8ea8c --- /dev/null +++ b/models/ukr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe8e11e3ab840dd6f860e01cab1d4b5c98d405d8d0b381210d1c0d2887d0b810 +size 145485271 diff --git a/models/ukr/config.json b/models/ukr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ukr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ukr/vocab.txt b/models/ukr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0b49606d5480c11550d993794040f702dc9f8fd4 --- /dev/null +++ b/models/ukr/vocab.txt @@ -0,0 +1,39 @@ +| +о +а +и +в +н +і +т +с +е +р +д +л +у +м +п +к +б +я +з +г +й +ь +х +ч +ж +щ +ю +ш +ї +є +ц +— +' +ф +‐ +ґ +- + diff --git a/models/unr/G_100000.pth b/models/unr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cd2e2031e4e76798992cebf921dea67c457b356e --- /dev/null +++ b/models/unr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:254e45e7465210a02dfefd40f9e023027dad09b8e25a82b82424af9fcd8730c0 +size 145496819 diff --git a/models/unr/config.json b/models/unr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/unr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/unr/vocab.txt b/models/unr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b726d451203122788a374280dde4d2702ee3e9f0 --- /dev/null +++ b/models/unr/vocab.txt @@ -0,0 +1,54 @@ +ହ +ଭ +ଙ +ା +ଜ +଼ +ଘ +ଏ +ଁ +ଫ +' +ୟ +କ +ଇ +ବ +ଖ +_ +ମ +ୁ +ଲ +ଚ + +ଗ +ି +ଉ +ଠ +ଦ +ସ +ଣ +ଷ +ଧ +ଶ +ତ +‍ +ୋ +ୱ +ଥ +୍ +ଂ +େ +ଛ +ଆ +ପ +ଞ +ଡ +ଅ +ଝ +ଟ +ଢ +ନ +ଯ +ୀ +ର +ଃ diff --git a/models/upv/G_100000.pth b/models/upv/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..80214b6a5f01346ede0e48ff360fd46b68b140bf --- /dev/null +++ b/models/upv/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591e15aef81cf2e09e3dc27d22b7d51a6b8241d4a6d9ce546914a709afda0ab8 +size 145479157 diff --git a/models/upv/config.json b/models/upv/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/upv/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/upv/vocab.txt b/models/upv/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..05b8ea6497a7f78f6c7791b88de952cdde86c85d --- /dev/null +++ b/models/upv/vocab.txt @@ -0,0 +1,31 @@ +ö +w +d +b +_ +p +l +r +' +2 +u +- +̃ +j +s +e + +o +g +v +6 +t +n +4 +a +0 +i +1 +m +k +5 diff --git a/models/ura/G_100000.pth b/models/ura/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..df332f092a7cbbf56dd515a9b687b4a0a13ba31b --- /dev/null +++ b/models/ura/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cd096963c4f90f92770daefd97c5254bd738a376ff0440adc3decfa7b176b82 +size 145475319 diff --git a/models/ura/config.json b/models/ura/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ura/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ura/vocab.txt b/models/ura/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ea2ecb6020c5c412e9d5e8be88a7de0982d61355 --- /dev/null +++ b/models/ura/vocab.txt @@ -0,0 +1,26 @@ +a +| +n +e +i +c +u +o +r +h +t +j +s +l +b +q +m +f +d +- +p +y +g +v +ñ + diff --git a/models/urb/G_100000.pth b/models/urb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6f3ec8b7fdfd9e96c16f54d3b9a5cc7824cf4f09 --- /dev/null +++ b/models/urb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5162efbeb6b73842ba9958376e224210750a4752c1f5ebcbba81780acc7c4f0e +size 145489907 diff --git a/models/urb/config.json b/models/urb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/urb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/urb/vocab.txt b/models/urb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..afb80d499cf75d57613246ca46ce7db1f39713bc --- /dev/null +++ b/models/urb/vocab.txt @@ -0,0 +1,45 @@ +— +w +b +x +é +5 +n +g +t +ỹ +e +ý +' +8 +4 +o +ã +_ +h +2 +7 +j +õ +r +i +u +ú +m + +9 +d +k +a +ó +á +3 +0 +p +1 +y +ĩ +ũ +ẽ +6 +s diff --git a/models/urd-script_arabic/G_100000.pth b/models/urd-script_arabic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cb455e8469bcad23cab679b8d906155f9b6b4505 --- /dev/null +++ b/models/urd-script_arabic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12447cd78e1322a1cae15b346efca96b3e903a28987883f74ca439413114dde0 +size 145499855 diff --git a/models/urd-script_arabic/config.json b/models/urd-script_arabic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/urd-script_arabic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/urd-script_arabic/vocab.txt b/models/urd-script_arabic/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..f5179139fdcf90030409c2675f01e0c44b81eb3d --- /dev/null +++ b/models/urd-script_arabic/vocab.txt @@ -0,0 +1,58 @@ +د +خ +آ +ؤ +' +چ +ط +8 +5 +ن +و +ے +أ +ل +گ +ا +ث +ق +ی +ص +3 +ڑ +ر +9 +2 +ئ + +7 +ت +ف +_ +ب +ظ +— +ش +4 +1 +غ +ع +ٰ +م +ھ +ض +ژ +ز +ک +ٹ +ذ +0 +ج +ۂ +6 +پ +ہ +ح +ڈ +ں +س diff --git a/models/urd-script_devanagari/G_100000.pth b/models/urd-script_devanagari/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..46a71b46777668ffec7ac94f6f5af53656cc8b23 --- /dev/null +++ b/models/urd-script_devanagari/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa904a6dfa0e2b48191cdc576dbf1cd1feb113755dc6d7284b1ef9f796ec8fc2 +size 145507585 diff --git a/models/urd-script_devanagari/config.json b/models/urd-script_devanagari/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/urd-script_devanagari/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/urd-script_devanagari/vocab.txt b/models/urd-script_devanagari/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e94190e2d98541a598512002c917701ec08a9ebe --- /dev/null +++ b/models/urd-script_devanagari/vocab.txt @@ -0,0 +1,68 @@ +| +ा +क +े +र +ह +न +स +म +ी +त +ि +़ +ल +् +ो +ं +द +ज +ब +ु +य +प +ै +ग +व +ू +उ +ख +अ +आ +ए +ँ +इ +फ +श +औ +च +ई +थ +भ +- +ड +ौ +छ +झ +ट +ठ +ऐ +ओ +घ +ध +ऊ +ढ +0 +' +2 +1 +4 +5 +— +9 +ः +3 +7 +8 +6 + diff --git a/models/urd-script_latin/G_100000.pth b/models/urd-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8518e9bdc2e30235a2337ad8a25bf1bfac395024 --- /dev/null +++ b/models/urd-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0bc8c2ddd5a0c3bedb1f48b33e9d86ad41fb5109ce0aa63e825718342fb9f1d +size 145491341 diff --git a/models/urd-script_latin/config.json b/models/urd-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/urd-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/urd-script_latin/vocab.txt b/models/urd-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..076006f6a6f96cc0c8e6b6933631673ced5b1cf5 --- /dev/null +++ b/models/urd-script_latin/vocab.txt @@ -0,0 +1,47 @@ +| +a +h +ā +e +k +i +r +s +u +n +ī +m +t +ṅ +l +o +d +b +p +y +j +g +ū +w +z +c +q +ḳ +f +- +ṛ +ṭ +ġ +ḍ +' +0 +4 +2 +1 +5 +9 +3 +7 +8 +6 + diff --git a/models/urk/G_100000.pth b/models/urk/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4fa5736f3975478377a0914aa7019618dfaaf62a --- /dev/null +++ b/models/urk/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725e86c8a87d61f07117b433772930500cd84521b59f048783eca17be03835c1 +size 145493766 diff --git a/models/urk/config.json b/models/urk/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/urk/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/urk/vocab.txt b/models/urk/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4c30ad38c7b5772beb43b330df6797bb64a30f35 --- /dev/null +++ b/models/urk/vocab.txt @@ -0,0 +1,50 @@ +| +า +อ +ู +ะ +ี +ก +ั +เ +น +ด +ื +ต +ฮ +ซ +บ +ล +ม +ร +ญ +จ +โ +ป +ย +ฆ +ฌ +ง +ว +ิ +- +แ +ํ +ค +' +ุ +ึ +ฟ +ท +พ +ช +3 +0 +4 +6 +1 +` +2 +q +t + diff --git a/models/urt/G_100000.pth b/models/urt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2e251a1d5db2e6f113741bd5f896423b3f9bc98 --- /dev/null +++ b/models/urt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:481b1a675dc3bbc3f5909dd6f40e61bcf7c16307d889cd79c006e1c316ce0400 +size 145485285 diff --git a/models/urt/config.json b/models/urt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/urt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/urt/vocab.txt b/models/urt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e5c8bb565c2e37eacd02dfeb6be69a88db0807e0 --- /dev/null +++ b/models/urt/vocab.txt @@ -0,0 +1,39 @@ +4 +8 +' + +ŋ +e +m +6 +r +g +d +5 +y +n +v +2 +h +t +_ +9 +i +u +- +b +3 +o +1 +a +ꞌ +p +f +s +j +k +w +7 +c +0 +l diff --git a/models/ury/G_100000.pth b/models/ury/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ca4fe2464f4b595867692096cf18c21bee746a37 --- /dev/null +++ b/models/ury/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:098e18ec090d2421589f760cb571c9ba5b794a651626e90d61b61b003d33b2c8 +size 145487594 diff --git a/models/ury/config.json b/models/ury/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ury/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ury/vocab.txt b/models/ury/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..218f708b18d24257c6ca9004b0febc9acf70715e --- /dev/null +++ b/models/ury/vocab.txt @@ -0,0 +1,42 @@ +- +8 +ï +9 +z +s +1 +d +ë +3 +w +b +– +r +o +i +l +' +u +6 +5 +n +h +y +m +a +ꞌ +0 + +g +f +k +2 +7 +t +e +— +4 +_ +c +p +j diff --git a/models/usp/G_100000.pth b/models/usp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b62b90b91c9c52f1a0c1e0d0b4b2270e7b5600b3 --- /dev/null +++ b/models/usp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f22ffd063559c5316b69cc2e3ab7da25f863bdf3581406a3ad20e2e2f227108 +size 145482211 diff --git a/models/usp/config.json b/models/usp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/usp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/usp/vocab.txt b/models/usp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1b8913191b13d5c1a4eb655c5909e0c54e366b70 --- /dev/null +++ b/models/usp/vocab.txt @@ -0,0 +1,35 @@ +| +a +i +j +' +k +c +t +r +l +n +e +o +h +s +u +w +b +m +x +y +p +d +z +q +ú +é +g +í +f +ó +v +á +ñ + diff --git a/models/uzb-script_cyrillic/G_100000.pth b/models/uzb-script_cyrillic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..33099b09657f9b71390acf6fc4af11a0d21c109f --- /dev/null +++ b/models/uzb-script_cyrillic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee797177d9e2dc2dc7719931ac2338a380ec3cfc9c46690794fceb090aedaa1 +size 145489921 diff --git a/models/uzb-script_cyrillic/config.json b/models/uzb-script_cyrillic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/uzb-script_cyrillic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/uzb-script_cyrillic/vocab.txt b/models/uzb-script_cyrillic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b7615016909b9cd783448b1027d16e92a1e155b2 --- /dev/null +++ b/models/uzb-script_cyrillic/vocab.txt @@ -0,0 +1,45 @@ +| +а +и +н +л +р +д +о +г +б +у +м +т +с +қ +к +з +ў +ш +е +й +ҳ +ч +х +в +э +ё +я +п +ғ +ж +ф +– +— +ю +ъ +0 +1 +2 +4 +6 +ь +3 +5 + diff --git a/models/vag/G_100000.pth b/models/vag/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..872df36ed7ae7749624803de1c32c295a910c237 --- /dev/null +++ b/models/vag/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04af287cd1ed1ce99edfb9500bc7cc491dddff93e7153b8fd0097660504ebe11 +size 145487575 diff --git a/models/vag/config.json b/models/vag/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vag/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vag/vocab.txt b/models/vag/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..45d08f766e65428374e158c71dc6e0ba8a8b2e43 --- /dev/null +++ b/models/vag/vocab.txt @@ -0,0 +1,42 @@ +| +a +i +n +u +ɛ +ᴐ +l +r +b +d +ŋ +w +e +k +g +m +h +z +s +y +t +p +o +f +c +j +í +á +ú +v +à +ń +́ +ì +' +ɔ +é +ó +2 +3 + diff --git a/models/vid/G_100000.pth b/models/vid/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7d094b27cc73df6bc7110f11b7a8401197aed2ed --- /dev/null +++ b/models/vid/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84814595dcc906c2b1c3b3a33f3a5fd4e0b735fcd9b1df6212d9af34c5473b12 +size 145475323 diff --git a/models/vid/config.json b/models/vid/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vid/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vid/vocab.txt b/models/vid/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fd101818347bb8666873bf7f767654197f6ac4f2 --- /dev/null +++ b/models/vid/vocab.txt @@ -0,0 +1,26 @@ + +m +z +e +_ +t +g +a +w +c +n +f +d +j +i +k +u +v +s +l +p +b +o +' +h +y diff --git a/models/vie/G_100000.pth b/models/vie/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f5c2177f7c5c651bbccc9ee00f658d6deedc96dd --- /dev/null +++ b/models/vie/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86da8a73c02690dfeec18b6e9f766f710a755b27d78e20a9bfac871018f94526 +size 145528297 diff --git a/models/vie/config.json b/models/vie/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vie/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vie/vocab.txt b/models/vie/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..d441eea3c45419425c6c4e3275b45f0e734be22e --- /dev/null +++ b/models/vie/vocab.txt @@ -0,0 +1,95 @@ +ụ +x +s +è +ì +ặ +ứ +ơ +u +ư +ằ +ỳ +â +c +v +ệ +ồ +_ +ỵ +ộ +ẻ +g +ạ +ĩ +ả +õ +ỡ +ẳ +ô +a +i +o +ừ +ỹ +ổ +à +ờ +ý +ù +– +ề +ễ +ẵ +ỏ +' +ò +ố +q +ọ +ẩ +ự +ã +2 +ị +e +đ +ó +ầ +é +ớ +ế +ủ +ử +d +ữ +ắ +ẽ +ẫ +m +ể +ũ +ỉ +ẹ +ỗ +í +y +ú +á +p +k +t +ấ +l +ở + +h +ỷ +ậ +b +ă +n +ê +r +ợ +- diff --git a/models/vif/G_100000.pth b/models/vif/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a946521425c8153f3dafd65cfe361c1f06eda3e --- /dev/null +++ b/models/vif/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8b66f61423a223469c7feb6571e9310b0431fec18a3d89a2c6e631795fe482f +size 145478379 diff --git a/models/vif/config.json b/models/vif/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vif/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vif/vocab.txt b/models/vif/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f6d7c85815e31c8ea0bafffbc8186854d1c4507a --- /dev/null +++ b/models/vif/vocab.txt @@ -0,0 +1,30 @@ +v +— +a +o +d +c +_ +b +- +g +‐ +n +k +e +p +t +f +i +ê +ë + +u +m +w +y +s +l +j +z +' diff --git a/models/vmw/G_100000.pth b/models/vmw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f98e2d8632cfe7ed006fb46bc7ef46c1da0f593f --- /dev/null +++ b/models/vmw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364bf033c5b5be05c18eae9b7a931e38a02ff936a8d96e44f4a15c7e422b7d16 +size 145477721 diff --git a/models/vmw/config.json b/models/vmw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vmw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vmw/vocab.txt b/models/vmw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..13bb2eb02c5b3d8659a46a53c95168e0580e7f2a --- /dev/null +++ b/models/vmw/vocab.txt @@ -0,0 +1,29 @@ +t +f +h +c +x +- +m +i +' +b +u +e +r +d +k +y +w +p +l +g +n +s +v + +a +o +j +_ +2 diff --git a/models/vmy/G_100000.pth b/models/vmy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..285716ec341a9f73229d1f03f5b2dca6b5fd1d45 --- /dev/null +++ b/models/vmy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6602e33519d8e54fa365b4d8581870fd32b7b98dcc1e837b42246d681852f26 +size 145482241 diff --git a/models/vmy/config.json b/models/vmy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vmy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vmy/vocab.txt b/models/vmy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bb543d8a2fd9b616d4dc947231d04fdeec1c2838 --- /dev/null +++ b/models/vmy/vocab.txt @@ -0,0 +1,35 @@ +l +x +i +í +u +b +k +é +v +ñ +n +q +r +f +c +o +e +p +t +m +_ +s +á +d +g +' +ó +a +z +h +ú + +̱ +j +y diff --git a/models/vun/G_100000.pth b/models/vun/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e9aab490e1cb785c20253ff0c74226d6f14913a1 --- /dev/null +++ b/models/vun/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b74cc0c867850bd02f0f39902a26fa72869cf4bb5cd389cf08ff4cf0044b4c56 +size 145478405 diff --git a/models/vun/config.json b/models/vun/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vun/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vun/vocab.txt b/models/vun/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..97a9411adc999c48f4478ec3be0296f7d4fe7eee --- /dev/null +++ b/models/vun/vocab.txt @@ -0,0 +1,30 @@ +| +a +i +y +n +o +u +k +e +w +m +l +s +h +g +d +r +ṙ +c +f +ṟ +t +p +b +- +' +j +z +v + diff --git a/models/vut/G_100000.pth b/models/vut/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b6307df172535b2b45ceb92bece899d2459a971 --- /dev/null +++ b/models/vut/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:224393cb0b4f73e42fa3a59e96702ad7a0322c1b927e7871728fd4f004d7a1b9 +size 145494507 diff --git a/models/vut/config.json b/models/vut/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/vut/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/vut/vocab.txt b/models/vut/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f2c05c19e6796722dc266ae2641f9df4b8584dc3 --- /dev/null +++ b/models/vut/vocab.txt @@ -0,0 +1,51 @@ +w + +̧ +ȩ +́ +b +ê +ɨ +s +y +à +ô +ə +c +è +o +d +_ +ɗ +k +v +p +l +t +ɓ +é +á +̀ +m +g +ì +n +h +j +u +i +û +r +í +î +̂ +ó +a +ú +ò +ŋ +ù +ɔ +â +e +f diff --git a/models/wal-script_ethiopic/G_100000.pth b/models/wal-script_ethiopic/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..57956dd33184f62c71322b476eb9d9a2ef1183a7 --- /dev/null +++ b/models/wal-script_ethiopic/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00409b6d252102a093838a13ad16de0a87b82b1b026ae666a7540be6cf380695 +size 145592143 diff --git a/models/wal-script_ethiopic/config.json b/models/wal-script_ethiopic/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wal-script_ethiopic/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wal-script_ethiopic/vocab.txt b/models/wal-script_ethiopic/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..79f8e3c6b98896bf7204c4d029d4cbc4a7ae81b9 --- /dev/null +++ b/models/wal-script_ethiopic/vocab.txt @@ -0,0 +1,178 @@ +| +ን +ስ +ይ +ነ +አ +ተ +እ +ት +ሰ +ያ +ደ +ዮ +ቴ +ድ +ግ +ኔ +ር +መ +ዶ +ዴ +በ +ኤ +ው +ጋ +ጌ +ከ +ዎ +ኦ +ረ +የ +ሀ +ታ +ኮ +ፔ +ኡ +ለ +ና +ዬ +ዋ +ሄ +ጦ +ሳ +ቀ +ሽ +ገ +ባ +ም +ጊ +ሱ +ሸ +ሶ +ል +ቶ +ቱ +ኑ +ቤ +ኣ +ክ +ኬ +ሮ +ዳ +ማ +ላ +ጎ +ቦ +ወ +ሁ +ሜ +ሴ +ሬ +ቆ +ብ +ቅ +ሃ +ጠ +ዘ +ሎ +ጥ +ቲ +ራ +ቃ +ሌ +ሞ +ጉ +ፕ +ፈ +ኪ +ቸ +ፐ +ቢ +ህ +ሙ +ዪ +ሲ +ጣ +ጤ +ዱ +ቄ +ሻ +ቁ +ኩ +ች +ሼ +ጴ +ሚ +ፖ +ኢ +ሉ +ጭ +ዽ +ጵ +ካ +ፑ +ፍ +ዝ +ጨ +ሹ +ጰ +ኖ +ዜ +ጫ +ዛ +ሩ +ሺ +ሾ +ዉ +ጬ +ሊ +ፌ +ፋ +ዞ +ሆ +ዲ +ቹ +ጡ +ቻ +ዙ +ዩ +ሪ +ፓ +ቾ +ፉ +ቂ +ቡ +ዌ +ቺ +ጄ +ቼ +ጩ +ኒ +ሂ +ጮ +ጢ +ፊ +ጪ +ዚ +ጶ +ዸ +ዺ +ጅ +ፎ +ጁ +ጀ +ዼ +ጱ +- +ፒ +ጆ +ጳ +ጃ +ጂ +ዾ +ጲ +ኙ +ዻ +ዹ + diff --git a/models/wal-script_latin/G_100000.pth b/models/wal-script_latin/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..536b4e2f13bc061a74198a85774efbcea4809bb0 --- /dev/null +++ b/models/wal-script_latin/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93856494896bea52f01bb8c30578209f06d6c9763fd4772b5775dc8639d69e5d +size 145478479 diff --git a/models/wal-script_latin/config.json b/models/wal-script_latin/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wal-script_latin/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wal-script_latin/vocab.txt b/models/wal-script_latin/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bb2e8cb07c55705d8ceef4582ec60b06615ba6f3 --- /dev/null +++ b/models/wal-script_latin/vocab.txt @@ -0,0 +1,30 @@ +a +| +i +e +s +o +n +t +d +y +h +u +g +k +b +m +r +p +l +w +q +x +7 +c +z +f +' +j +- + diff --git a/models/wap/G_100000.pth b/models/wap/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..baa05dbe1384a98a9352d876ecc1fdc8d5ca3ff2 --- /dev/null +++ b/models/wap/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c182f0bb2b4376c305920bfc821ac5beea80d54b45f2ee7fdceb02f23f0bf9 +size 145488337 diff --git a/models/wap/config.json b/models/wap/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wap/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wap/vocab.txt b/models/wap/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..047da0a272176340a97376d1471b7bba5c707605 --- /dev/null +++ b/models/wap/vocab.txt @@ -0,0 +1,43 @@ +ã +z +p +m +3 +b +s +' +u +i +o +q +h +n +5 +õ +e +- +8 +a +y +0 +2 +l +7 +k +x +g +j +w + +r +c +t +f +d +9 +1 +4 +_ +6 +v +ĩ diff --git a/models/war/G_100000.pth b/models/war/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbef2b13424f8adea49476da35b9f830894bf0e2 --- /dev/null +++ b/models/war/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ffd4f285b2cf77c62cc4bc03b93721d93371a3df290e67bce18cf645c59bab4 +size 145487619 diff --git a/models/war/config.json b/models/war/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/war/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/war/vocab.txt b/models/war/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7be05258877a0e0594d1fd7d4719c56c1f161a57 --- /dev/null +++ b/models/war/vocab.txt @@ -0,0 +1,42 @@ +a +| +n +i +g +o +h +t +k +m +u +s +d +y +p +r +l +b +w +e +- +j +c +f +— +v +q +z +0 +4 +1 +2 +x +' +5 +3 +8 +9 +7 +ñ +6 + diff --git a/models/waw/G_100000.pth b/models/waw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e8e822b7799d5d558cb600e63b3aff690b01968e --- /dev/null +++ b/models/waw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e40b94c587acd7bd31525eaad55c6e37bcbbb69f1f8b43937cb4a6fba0155a +size 145481478 diff --git a/models/waw/config.json b/models/waw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/waw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/waw/vocab.txt b/models/waw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..be0eccd57d39797093338674efdd59556e3305c6 --- /dev/null +++ b/models/waw/vocab.txt @@ -0,0 +1,34 @@ +| +a +o +e +k +n +r +m +t +î +w +h +y +̂ +p +i +s +x +c +u +b +0 +1 +2 +7 +4 +3 +5 +- +6 +8 +9 +d + diff --git a/models/way/G_100000.pth b/models/way/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c978d2a5a8f735b0fea5721fe29cccbb9364daaa --- /dev/null +++ b/models/way/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cbcc74e79cbeb96dc8ca93b10a188d9ebcee0aead634b150d120857544a88da +size 145477633 diff --git a/models/way/config.json b/models/way/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/way/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/way/vocab.txt b/models/way/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..17b1773a06aba8fc8e1e640756270746f676e2b9 --- /dev/null +++ b/models/way/vocab.txt @@ -0,0 +1,29 @@ +| +a +ë +m +e +t +o +k +n +p +i +l +ï +h +u +w +j +s +0 +1 +2 +7 +4 +3 +5 +6 +9 +8 + diff --git a/models/wba/G_100000.pth b/models/wba/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d8956f58c43f1cdc6fcaf2c85bbfd7bd17a0221c --- /dev/null +++ b/models/wba/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4165181dfd74c669f93a2721096e5b63846ed961a6fc20acd5feb6edb413d7e9 +size 145483856 diff --git a/models/wba/config.json b/models/wba/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wba/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wba/vocab.txt b/models/wba/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..17374005e4c2ab423ddf4268d695ba3860265199 --- /dev/null +++ b/models/wba/vocab.txt @@ -0,0 +1,37 @@ +e +n +m +x +q +i +á +í +é +k +o +g +y +r +z +ó +d + +_ +s +h +c +v +w +l +b +ã +a +u +f +ñ +t +p +õ +- +j +ú diff --git a/models/wlo/G_100000.pth b/models/wlo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f1a52aaa9345882945ca50fef1bab23b5921efcb --- /dev/null +++ b/models/wlo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8add66066ac45b00c372cfc2aede91820c00fe25534f0f352f69b5db92d2bb22 +size 145476851 diff --git a/models/wlo/config.json b/models/wlo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wlo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wlo/vocab.txt b/models/wlo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0deb9da065fd8d65d395315018f2496a352d2913 --- /dev/null +++ b/models/wlo/vocab.txt @@ -0,0 +1,28 @@ +c +a +h +j +k +o + +f +i +' +d +p +ʼ +l +_ +r +w +z +s +e +m +t +g +n +u +- +b +y diff --git a/models/wlx/G_100000.pth b/models/wlx/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..407dd16469bfba476388bcf007b952107b56cd77 --- /dev/null +++ b/models/wlx/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd3f45b744e4a50aaf6b2f2d9059dbbf4136a4f2ac15ade72c331a8b648dc3c9 +size 145481445 diff --git a/models/wlx/config.json b/models/wlx/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wlx/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wlx/vocab.txt b/models/wlx/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5bab264e26567c4bef87d9d16fef829b90cb3d43 --- /dev/null +++ b/models/wlx/vocab.txt @@ -0,0 +1,34 @@ +o +9 +4 +w +h +k +m +q +5 +' +x +z +1 +_ +d +e +y +c +v +r +p +t +i +a +n +b +u +s + +j +g +f +- +l diff --git a/models/wmw/G_100000.pth b/models/wmw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5b8322a55c404518231f3bd52cff8ec6a91354f8 --- /dev/null +++ b/models/wmw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a437ed9cf0990f778fbe459c14e6f602621ce8ceb4be35a68ad916b0bae33b +size 145486825 diff --git a/models/wmw/config.json b/models/wmw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wmw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wmw/vocab.txt b/models/wmw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..bc448a84f5c39dcccf2d0a53a93adf195e436aee --- /dev/null +++ b/models/wmw/vocab.txt @@ -0,0 +1,41 @@ +a +| +i +u +n +w +k +e +m +o +y +r +s +l +t +b +p +d +z +g +j +f +v +h +‐ +c +á +' +- +í +é +â +ú +ã +ó +î +û +ê +ô +ũ + diff --git a/models/wob/G_100000.pth b/models/wob/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..871711c48e0cb2fb23a1a5a5f6de082fe0ac5fec --- /dev/null +++ b/models/wob/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fe6bb83d627938a5d5cb572666dcc4862b96ae4fc7c00522383c9d428a790d2 +size 145479155 diff --git a/models/wob/config.json b/models/wob/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wob/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wob/vocab.txt b/models/wob/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..62a8140527d5dbdaad7955b15dc3dd91cfa55dd4 --- /dev/null +++ b/models/wob/vocab.txt @@ -0,0 +1,31 @@ +| +' +a +‐ +e +n +ɛ +ʋ +i +ɔ +k +ɩ +o +d +s +m +w +j +u +t +p +b +l +y +r +c +g +f +- +v + diff --git a/models/wsg/G_100000.pth b/models/wsg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a061a7214d5bb719302c7e506b10930633739dfa --- /dev/null +++ b/models/wsg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1a1bfc6c88b8b99ba5789174c0ccc38801bdb4bc537e77ee1ca2974f98451e1 +size 145487709 diff --git a/models/wsg/config.json b/models/wsg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wsg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wsg/vocab.txt b/models/wsg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4aef659755273b8b17c3ba5fe839fbcd4c0f4722 --- /dev/null +++ b/models/wsg/vocab.txt @@ -0,0 +1,42 @@ +ల +చ +ద +ు +ా +ం +ప +ఆ +ఒ +ె +_ +k +అ + +ే +ఎ +య +ి +బ +ర +మ +వ +్ +క +ూ +ఈ +స +జ +ొ +ఊ +డ +ో +ీ +గ +హ +' +త +ఇ +న +- +ఉ +ట diff --git a/models/wwa/G_100000.pth b/models/wwa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5a36e345a70cf71522fc41a5660d2fe61a78ad50 --- /dev/null +++ b/models/wwa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40cfcf48e6386f321659dd5316f50be22bf3f46b46f241cd648557576f127ad8 +size 145483011 diff --git a/models/wwa/config.json b/models/wwa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/wwa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/wwa/vocab.txt b/models/wwa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e2fe1439a2212de873b78e10b9137bdbcc68e05b --- /dev/null +++ b/models/wwa/vocab.txt @@ -0,0 +1,36 @@ +| +a +i +n +t +d +o +m +b +r +e +s +u +y +k +ɛ +ɔ +à +w +p +ò +ŋ +ǹ +- +ì +f +c +̃ +ã +̀ +' +` +ĩ +ũ +2 + diff --git a/models/xal/G_100000.pth b/models/xal/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..183de76dbec64b48bdfbd791024727856c9f0e82 --- /dev/null +++ b/models/xal/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:867788169e170a0ad17ac98df3845abaa4d4d37dcbcb82cec621cfd54d2e89f3 +size 145492985 diff --git a/models/xal/config.json b/models/xal/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xal/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xal/vocab.txt b/models/xal/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..456063cd8741ef58913e70da1d2f5a68850b1c07 --- /dev/null +++ b/models/xal/vocab.txt @@ -0,0 +1,49 @@ +и +в +ю +ж +к +р +т +г + +ә +җ +д +_ +ч +п +х +ң +л +ө +ү +у +3 +ф +һ +о +б +ъ +0 +- +э +м +я +с +ё +а +е +й +з +1 +2 +– +6 +4 +н +ц +ѳ +ы +ш +ь diff --git a/models/xdy/G_100000.pth b/models/xdy/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..73720966b935ba2f07b99a2f7f49a3ec3f6a821e --- /dev/null +++ b/models/xdy/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0e469f86fe14f292cc738b73e619ed40374da5b33168487679b380bf5ea325b +size 145476085 diff --git a/models/xdy/config.json b/models/xdy/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xdy/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xdy/vocab.txt b/models/xdy/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e59612c1e93e40606f7f1c034170d8400fb09bc1 --- /dev/null +++ b/models/xdy/vocab.txt @@ -0,0 +1,27 @@ +m +b + +u +r +t +l +g +j +e +p +d +k +h +w +o +' +- +f +z +_ +i +a +c +s +n +y diff --git a/models/xed/G_100000.pth b/models/xed/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b81701b3d065dc51868313f8920cb64665fd52f5 --- /dev/null +++ b/models/xed/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a870dc88ff59d34b39f34fffcab134766713575dea1537825b751766cfb972bd +size 145478387 diff --git a/models/xed/config.json b/models/xed/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xed/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xed/vocab.txt b/models/xed/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5d8f74a64d9d6177d047c7e256d6e2d720e78c4d --- /dev/null +++ b/models/xed/vocab.txt @@ -0,0 +1,30 @@ +ŋ +h +b +k +ɗ +á +ɓ +w +t +ə +g +í +_ +n +m +f +a +d +ʼ +r +l +u +s +e +z + +p +v +i +y diff --git a/models/xer/G_100000.pth b/models/xer/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..3af7e8650444d2ee3ad6b3ca78fca9a44ecb2c7b --- /dev/null +++ b/models/xer/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fadeda07840f40ee4ef6889a93b75984261885ca2745e911f0962f9be5563c66 +size 145496693 diff --git a/models/xer/config.json b/models/xer/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xer/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xer/vocab.txt b/models/xer/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..590c3cdfeb48c820bbd0db2fc93ed2a0f214b49d --- /dev/null +++ b/models/xer/vocab.txt @@ -0,0 +1,54 @@ +p +e +a +x +i +k +d +ó +2 +t +ô +ĩ +g +1 +c +ê +j +â +h +8 +l + +õ +n +ũ +é +ú +z +r +5 +w +v +u +í +0 +7 +9 +á +ẽ +ã +o +f +s +û +ø +b +- +3 +6 +q +_ +' +— +m diff --git a/models/xmm/G_100000.pth b/models/xmm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5338440eeb7a189c6c8c20442e187dc47c4b787c --- /dev/null +++ b/models/xmm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8755c07f6c9cbd933cf4368cb749c0a8d011ee8680e16799af3d567c679114d9 +size 145480715 diff --git a/models/xmm/config.json b/models/xmm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xmm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xmm/vocab.txt b/models/xmm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f92e5cf195116803fd002a5c6efaf1582b36ba6b --- /dev/null +++ b/models/xmm/vocab.txt @@ -0,0 +1,33 @@ +2 +4 +' +r +c +l +s +0 +z +a +j +m +y +o +d +n +i +f +b +e +h +t +6 +g +3 +u +_ +p +k +1 + +w +- diff --git a/models/xnj/G_100000.pth b/models/xnj/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5c3a26a62b54bcd9cc7fcf96b95636b5040251e3 --- /dev/null +++ b/models/xnj/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a84393ca47d324455e3d40c4efe1d3103ec58c4fa849313e52250b8db17e77b +size 145475293 diff --git a/models/xnj/config.json b/models/xnj/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xnj/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xnj/vocab.txt b/models/xnj/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a21f1b9d58f54dbe6adb62e5f3153aea2c2a6cb7 --- /dev/null +++ b/models/xnj/vocab.txt @@ -0,0 +1,26 @@ +n +_ +f +p +o +w +k +v +y +a +i +j +h +e +c +z +d +u +t +' +l +b +s +m +g + diff --git a/models/xnr/G_100000.pth b/models/xnr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d63a078b71bb1111ee3018267bbcd053dce2ec37 --- /dev/null +++ b/models/xnr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53552267f237f74b488a266339b43a99bc454ff34741823e3752993bd1dfaceb +size 145502199 diff --git a/models/xnr/config.json b/models/xnr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xnr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xnr/vocab.txt b/models/xnr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d252a9264d5ffe068c1973eb6d0c10ee765d7efd --- /dev/null +++ b/models/xnr/vocab.txt @@ -0,0 +1,61 @@ +| +ा +े +् +ं +त +र +क +ह +ि +स +द +न +ी +ज +म +ल +य +प +ो +ु +ै +़ +ग +ण +ब +च +अ +आ +ई +ड +ऐ +ू +इ +ख +भ +थ +व +ए +श +ौ +ध +छ +फ +ठ +ट +झ +- +घ +ष +ऊ +उ +ओ +ढ +ँ +औ +ञ +ृ +' +— + diff --git a/models/xog/G_100000.pth b/models/xog/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a0d108f48f490b826ede6202b44db1ff5275bdb2 --- /dev/null +++ b/models/xog/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48c8130889b6ee017875735a9cd1b1afd24655cad3d4e21e3d66f2bd2c68581 +size 145475311 diff --git a/models/xog/config.json b/models/xog/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xog/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xog/vocab.txt b/models/xog/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..08a77d56d5793cf11bbffb483e66249718f488cb --- /dev/null +++ b/models/xog/vocab.txt @@ -0,0 +1,26 @@ +y +a +m +s +f +k +h +o +_ +w +l +b +d +n +' +p +v +i +u +e +t +z +g + +j +r diff --git a/models/xon/G_100000.pth b/models/xon/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8f6dd286b89dd367fd1255f69eb3a09bb7f8562 --- /dev/null +++ b/models/xon/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5442604d97275a260d903d3a015c0d19bb7f19d521980ae9f35c663595767ad1 +size 145477621 diff --git a/models/xon/config.json b/models/xon/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xon/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xon/vocab.txt b/models/xon/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..5a72f97fda13cb64a3828372c7deda532891f2f3 --- /dev/null +++ b/models/xon/vocab.txt @@ -0,0 +1,29 @@ +| +a +i +n +u +b +k +e +m +l +t +ɔ +p +o +r +y +w +s +h +g +d +ŋ +j +c +f +- +' +v + diff --git a/models/xrb/G_100000.pth b/models/xrb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e2d5afc9019184615fa5afbb125071687c104f47 --- /dev/null +++ b/models/xrb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75e8f9fb0363bffa47a4eec391807d93b196b41cb5d10430ce40a31f2a7ea38b +size 145481483 diff --git a/models/xrb/config.json b/models/xrb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xrb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xrb/vocab.txt b/models/xrb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..13dcc93afa12b73ea82541d88b9efdfb3624c78d --- /dev/null +++ b/models/xrb/vocab.txt @@ -0,0 +1,34 @@ +| +a +e +y +n +ã +i +o +w +k +u +p +m +h +l +s +t +r +g +ĩ +b +ɛ +ẽ +f +d +ŋ +- +c +ũ +ɔ +̃ +' +j + diff --git a/models/xsb/G_100000.pth b/models/xsb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..40fa25bd3587e54fde9813e7f2d831e4c8b47ec4 --- /dev/null +++ b/models/xsb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66fc6bd553cbb6a94f90c9049db4d498e8d6d6026106605f4b0a1ea3ece8678c +size 145487620 diff --git a/models/xsb/config.json b/models/xsb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xsb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xsb/vocab.txt b/models/xsb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d09eae7d6b83dc2045ad9188d159e01e967569f4 --- /dev/null +++ b/models/xsb/vocab.txt @@ -0,0 +1,42 @@ +f +t + +2 +0 +à +ô +c +p +z +b +u +ñ +g +' +y +s +4 +î +e +á +1 +w +q +_ +6 +x +d +r +h +j +k +m +n +a +i +o +ó +- +l +v +â diff --git a/models/xsm/G_100000.pth b/models/xsm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb0bda893be20620f151f0ca49d8fb5262ebf21b --- /dev/null +++ b/models/xsm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e369c96828f45b858d67b39a88aab922e7c036c20a818eba2143dfb580e788e1 +size 145482221 diff --git a/models/xsm/config.json b/models/xsm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xsm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xsm/vocab.txt b/models/xsm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3285adcbc5cc11405920913701a4abf1a88047e9 --- /dev/null +++ b/models/xsm/vocab.txt @@ -0,0 +1,35 @@ +| +a +e +o +n +m +t +i +w +b +d +k +y +ɔ +r +l +g +s +ŋ +u +ɛ +- +p +á +j +z +h +c +é +ó +v +f +' +ǝ + diff --git a/models/xsr/G_100000.pth b/models/xsr/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..34bdd7271a94f33568ae8c67676d7f48826edf59 --- /dev/null +++ b/models/xsr/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f5579e783ed717e45c6155d28cd1fbd72076e1652fe2275f459fc2ae458ae4c +size 145499097 diff --git a/models/xsr/config.json b/models/xsr/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xsr/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xsr/vocab.txt b/models/xsr/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c5a605957951df68361e27af3280ca9fa71fddb4 --- /dev/null +++ b/models/xsr/vocab.txt @@ -0,0 +1,57 @@ +ठ +द +फ +ँ +ी +् +ओ +प +न +त +ट +श +ख +इ +ो +स +झ +ङ +ष + +ल +' +६ +भ +ौ +_ +ऊ +थ +उ +ड +क +ह +घ +ज +ै +ए +ू +छ +- +ि +व +ु +आ +ई +ा +ध +अ +ण +य +च +ग +ब +म +र +‍ +ं +े diff --git a/models/xsu/G_100000.pth b/models/xsu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b31d1375f5c17fbd64ccda6b26a8557fe0e341dc --- /dev/null +++ b/models/xsu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4453e1a1eba41d602c0449a606c184b232e1e79ecdc51cc0a41eebcb0de14f +size 145494535 diff --git a/models/xsu/config.json b/models/xsu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xsu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xsu/vocab.txt b/models/xsu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4b8a889d20557cef9535c64978dd54cb90f4c3e5 --- /dev/null +++ b/models/xsu/vocab.txt @@ -0,0 +1,51 @@ +| +a +ö +i +t +p +k +o +n +s +u +m +l +ĩ +e +h +ä +w +' +j +õ +ã +r +̃ +c +é +ẽ +d +0 +ó +b +á +v +ô +1 +í +7 +4 +2 +3 +ê +5 +f +ũ +g +ú +6 +8 +â +9 + diff --git a/models/xta/G_100000.pth b/models/xta/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..a1647d26fb4e06b90d08bbc26b9bd4c9e67915ab --- /dev/null +++ b/models/xta/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a640b0b8050339d567b1836797eb455941b18e53552215a9fe199363de11f6d +size 145487705 diff --git a/models/xta/config.json b/models/xta/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xta/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xta/vocab.txt b/models/xta/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f4599c0d0142a0ae7ff29c4a39fc7913170ec128 --- /dev/null +++ b/models/xta/vocab.txt @@ -0,0 +1,42 @@ +s +d +m +u +_ +g +ó +ä +l +é +k +z +á +n +x +h +p +f +o +y +i +ú +c +q +r +ö +̱ +ǔ +e +v +ü +ꞌ +í +j +t +ǎ +b +ë +ñ +ï + +a diff --git a/models/xtd/G_100000.pth b/models/xtd/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ebbe6e8a1f12ad092bd461a727b8b88c86138710 --- /dev/null +++ b/models/xtd/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ec74958a4cc33605327955acd65823bcba54d22c6f0b04058aeb2f52d3e15c8 +size 145485314 diff --git a/models/xtd/config.json b/models/xtd/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xtd/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xtd/vocab.txt b/models/xtd/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c60843d140225735c3286ae22ecb9e73a71822e2 --- /dev/null +++ b/models/xtd/vocab.txt @@ -0,0 +1,39 @@ + +ʼ +i +k +x +ɨ +ñ +h +c +n +ó +— +d +u +í +' +m +‐ +z +v +ú +e +g +j +y +f +q +é +o +b +á +́ +t +p +a +r +l +s +_ diff --git a/models/xte/G_100000.pth b/models/xte/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..737e1c453967a244c8a840737391d39326f57ef3 --- /dev/null +++ b/models/xte/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7753829e958f6daf482e8df729855e6c27a17e9f225403b7d111db0e7d59fa92 +size 145483008 diff --git a/models/xte/config.json b/models/xte/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xte/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xte/vocab.txt b/models/xte/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..838f3a49cc24b61283eebc30c881e7b7feecb5a6 --- /dev/null +++ b/models/xte/vocab.txt @@ -0,0 +1,36 @@ +h +4 +w + +r +0 +m +j +z +9 +l +p +5 +y +2 +a +b +7 +s +- +d +1 +u +f +o +i +e +t +c +_ +k +8 +g +n +6 +3 diff --git a/models/xtm/G_100000.pth b/models/xtm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d62eaeb7c2bdc2f6871aed0908e3e6d660a43eab --- /dev/null +++ b/models/xtm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a4477e9cab7a4200e2d3a6dca884282c2006a2fb72e70d3f9d450d53b3aff1 +size 145486853 diff --git a/models/xtm/config.json b/models/xtm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xtm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xtm/vocab.txt b/models/xtm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ba8e584961943e0ae9cbf16300a13e537035522e --- /dev/null +++ b/models/xtm/vocab.txt @@ -0,0 +1,41 @@ +â +i +m +e +r +‍ +v +j +á +d +t +û +f +' +‐ +k +u +y +ú +é +o +l +í +h +ó +n +x +î +z +c +_ +ñ + +q +b +p +w +a +g +ê +s diff --git a/models/xtn/G_100000.pth b/models/xtn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f711dda9ca3076f5b2f70082bc37a86a5018387c --- /dev/null +++ b/models/xtn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6f7d24aa1ab2a5c454ed50ea8c7a068fdf62f662f009277372326d324b0f44 +size 145482213 diff --git a/models/xtn/config.json b/models/xtn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xtn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xtn/vocab.txt b/models/xtn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ac532c8652b395369a3081988e531b4ea77ccc6e --- /dev/null +++ b/models/xtn/vocab.txt @@ -0,0 +1,35 @@ +i +͏ +n +u +p +t +k +á +l +o +r +h +q +x + +v +e +m +s +j +ꞌ +ñ +_ +g +í +c +y +z +ó +a +d +b +f +ú +é diff --git a/models/xua/G_100000.pth b/models/xua/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1a8f3caac1d2d2be6a902ff374df2cc6ae73e0ac --- /dev/null +++ b/models/xua/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4662e479020d6182e0f6f85b56761ffa902380ad3ea7092095a73fcf1a6b1359 +size 145490773 diff --git a/models/xua/config.json b/models/xua/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xua/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xua/vocab.txt b/models/xua/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc30ddaa4edf631cac04a82ebfc8bf18b3c2f670 --- /dev/null +++ b/models/xua/vocab.txt @@ -0,0 +1,46 @@ +ெ +ள +ூ +ஆ +ல +ஈ +இ +ற +ு +ச +ீ +ஓ +ை +ஹ + +அ +ம +ண +ா +உ +ஊ +ந +ே +- +_ +ஒ +த +ி +ழ +ஐ +் +ஞ +ன +ஸ +ப +ோ +வ +ட +ங +ஜ +ஏ +ொ +க +எ +ர +ய diff --git a/models/xuo/G_100000.pth b/models/xuo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1d512e7e93067652e14890e74924b3559aff300a --- /dev/null +++ b/models/xuo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179ad687a447494312c932c83cb65c661a768451dc3c32e88736e865da48d0ac +size 145494508 diff --git a/models/xuo/config.json b/models/xuo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/xuo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/xuo/vocab.txt b/models/xuo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..12566b2fe1c4668d176aac1609aa510214be7a38 --- /dev/null +++ b/models/xuo/vocab.txt @@ -0,0 +1,51 @@ +ě +_ +ḭ +é +̰ +k +ɓ +v +f +p +- +ù +e +ɗ +y +ú +à +n +ì +m +z +́ +b +d +ɔ +i +ǔ +ǐ +s +w + +ɛ +l +̌ +ǒ +h +o +t +í +á +r +' +ŋ +ṵ +u +ʼ +ó +g +ǎ +̀ +a diff --git a/models/yaa/G_100000.pth b/models/yaa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..35fdc920680df4e7d1a4e8c7607d3f8a84e38f57 --- /dev/null +++ b/models/yaa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0f3528f87a46c7f5af6e882e14fc50e8cd8f86065491e62afc7a23e152afeda +size 145483859 diff --git a/models/yaa/config.json b/models/yaa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yaa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yaa/vocab.txt b/models/yaa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c786fcd842b80eb6b3fa3f0403b9f9e4e5481341 --- /dev/null +++ b/models/yaa/vocab.txt @@ -0,0 +1,37 @@ +a +| +i +o +k +n +f +s +e +m +ã +t +r +õ +x +p +h +y +ĩ +ẽ +c +j +ú +u +l +d +b +é +í +á +g +v +z +ó +q +ñ + diff --git a/models/yad/G_100000.pth b/models/yad/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..f4eacab24f7337277c52e39aa50c1d8ac1bf88f3 --- /dev/null +++ b/models/yad/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c891e89a24d6554e39e0ea91d38cb98cd345860b5fe686243f1efb717868ec +size 145478369 diff --git a/models/yad/config.json b/models/yad/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yad/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yad/vocab.txt b/models/yad/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..407296738da38a6c8d24fc8bf364d2ed4d2d5dbb --- /dev/null +++ b/models/yad/vocab.txt @@ -0,0 +1,30 @@ +a +| +i +y +̱ +u +r +j +t +n +d +s +v +e +m +c +h +o +ñ +b +p +q +ú +á +í +é +ó +l +g + diff --git a/models/yal/G_100000.pth b/models/yal/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e869ae313a8d4312338d5b4d44a4e0fd19b1fa37 --- /dev/null +++ b/models/yal/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068ad6a48b9f865bda5e2ce4a4ecec19369530e93cc2974c34795c804fb8cf9f +size 145477739 diff --git a/models/yal/config.json b/models/yal/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yal/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yal/vocab.txt b/models/yal/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..476cdb9edd88fdea1825da012ce18b89f1c51ac9 --- /dev/null +++ b/models/yal/vocab.txt @@ -0,0 +1,29 @@ +| +a +n +i +e +x +y +m +ɛ +u +l +t +r +s +f +b +o +ɔ +d +k +g +ɲ +w +h +p +- +1 +c + diff --git a/models/yam/G_100000.pth b/models/yam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ba920ea77f49af6ac2ae5ba1556c2b96e5a4808a --- /dev/null +++ b/models/yam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:776af7001998f5c45c08d835fcf8c71606563ec5e8b2e0edf2379cf597dc2559 +size 145486805 diff --git a/models/yam/config.json b/models/yam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yam/vocab.txt b/models/yam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f56e08604838f14278b8380d8015dd08d59b1906 --- /dev/null +++ b/models/yam/vocab.txt @@ -0,0 +1,41 @@ +| +ə +n +s +̀ +a +ŋ +w +m +y +à +k +e +o +' +ʉ +b +i +g +è +d +t +ò +f +u +v +p +l +ì +c +ɛ +z +r +h +j +ù +ʼ +- +ˋ +̍ + diff --git a/models/yao/G_100000.pth b/models/yao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5852360b8dbed628a95e2bcce412fc33da193375 --- /dev/null +++ b/models/yao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99508b8f8232d536a68a9d6f6ecdcf4ca37c3afd7d4f66beae89f6ff61c4cd51 +size 145476869 diff --git a/models/yao/config.json b/models/yao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yao/vocab.txt b/models/yao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..aa58fad3e269a6dc33a54d7232ed4d42e92b994e --- /dev/null +++ b/models/yao/vocab.txt @@ -0,0 +1,28 @@ +g +_ +b +c +n +s +ŵ +j + +v +m +k +r +z +' +d +a +e +t +u +y +f +l +i +w +p +h +o diff --git a/models/yas/G_100000.pth b/models/yas/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..79b3ee48084abf728195924e7542f01e86ef9f9e --- /dev/null +++ b/models/yas/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7feec51f5e80bb31f67969935a6fcb09c9a9f0004ab8f460ab55516861e17cd8 +size 145479137 diff --git a/models/yas/config.json b/models/yas/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yas/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yas/vocab.txt b/models/yas/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9d5311083a18d8d522d0ae7b60e5104bbf5ee1ef --- /dev/null +++ b/models/yas/vocab.txt @@ -0,0 +1,31 @@ +ɛ +́ +b +i +p +l +t +h +m +é + +g +k +d +í +_ +ú +o +c +ŋ +u +e +– +ó +f +a +s +y +á +ɔ +n diff --git a/models/yat/G_100000.pth b/models/yat/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..71b3fb9866e10d33c7b519f0a0b65626eb0b07ec --- /dev/null +++ b/models/yat/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143aeec7ff34b7929be39b3451d00c94a3e8a4595df8cfed7d71ba96ef6d43c2 +size 145482199 diff --git a/models/yat/config.json b/models/yat/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yat/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yat/vocab.txt b/models/yat/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f1080b847a94ff6bd929fe9fad018698e9f01979 --- /dev/null +++ b/models/yat/vocab.txt @@ -0,0 +1,35 @@ +y +s +a +i +n +b +t +r +́ +l +_ +g +ɛ +d +ú +k +m +e +è +ɔ +c +á +u +f +ŋ +í +o + +p +ó +w +j +ʼ +ə +é diff --git a/models/yaz/G_100000.pth b/models/yaz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9bcaa32888e76a7ab867409e42903308a1ec0e67 --- /dev/null +++ b/models/yaz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f56258e12cb04ca9088a57623d4187099864ee6325105258359b9289388b695 +size 145482229 diff --git a/models/yaz/config.json b/models/yaz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yaz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yaz/vocab.txt b/models/yaz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..448d06c102e2b21fb1ca98e663315f8d49e2b842 --- /dev/null +++ b/models/yaz/vocab.txt @@ -0,0 +1,35 @@ +h +ẹ +w +n +d +v +_ + +t +g +ọ +a +y +̀ +j +ì +l +à +z +i +- +ạ +' +ò +s +o +k +e +r +c +p +f +u +b +m diff --git a/models/yba/G_100000.pth b/models/yba/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..acef5f0ea61c525c344a411d640649c559a0083c --- /dev/null +++ b/models/yba/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:760b8a4d76f94c147550b12e6ab1d84e0a5432380534e818367086320b9fce96 +size 145483091 diff --git a/models/yba/config.json b/models/yba/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yba/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yba/vocab.txt b/models/yba/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a7306c19a499f7efec511583007e930057b0bd2f --- /dev/null +++ b/models/yba/vocab.txt @@ -0,0 +1,36 @@ + +ɛ +s +̄ +n +f +d +u +w +p +ā +m +b +_ +o +' +ɔ +r +j +l +a +ō +h +k +i +e +c +ū +ī +g +t +6 +- +y +ē +̍ diff --git a/models/ybb/G_100000.pth b/models/ybb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..2f1450c9b68c0ae68777b9bff01465dafa6ab2c0 --- /dev/null +++ b/models/ybb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf99b744a9d13fa08c6da794fd34867959deda1804e474934250913682efe12 +size 145490676 diff --git a/models/ybb/config.json b/models/ybb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ybb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ybb/vocab.txt b/models/ybb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b261baa7202edfad6da4cf0ca18f7f1693b0d17a --- /dev/null +++ b/models/ybb/vocab.txt @@ -0,0 +1,46 @@ +| +e +ɔ +ŋ +ɛ +m +á +n +́ +a +t +p +é +ʼ +s +l +g +i +k +h +ʉ +w +z +̄ +y +í +d +u +ī +b +ú +ū +f +ā +j +o +ń +ē +ó +c +ō +ḿ +‐ +v +- + diff --git a/models/ycl/G_100000.pth b/models/ycl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..794ce857f5dec65f6188baa612404de8185dbeb1 --- /dev/null +++ b/models/ycl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ec0ae6869d74fad65f549aaff9076d73efcd3c146cb0fd4f1da54dec638d5f6 +size 145484531 diff --git a/models/ycl/config.json b/models/ycl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ycl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ycl/vocab.txt b/models/ycl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fec845e8fb45a3f6bfc6100d13de3e2e4a10390d --- /dev/null +++ b/models/ycl/vocab.txt @@ -0,0 +1,38 @@ +6 +h +0 +d +c +f +' +2 +v +i +o + +b +s +x +a +r +m +e +n +z +l +p +k +1 +- +3 +j +5 +g +t +y +q +_ +w +4 +9 +u diff --git a/models/ycn/G_100000.pth b/models/ycn/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b6980d7536a49fe7797c58162519c0e8a5601e0a --- /dev/null +++ b/models/ycn/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc34defbe6f8f23f075e19f32fbde2ec83fcbf8eeaab0865fbfc64e0cd56862 +size 145486839 diff --git a/models/ycn/config.json b/models/ycn/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ycn/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ycn/vocab.txt b/models/ycn/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9ec384c21e90f094c65d5d450550a5a2869910be --- /dev/null +++ b/models/ycn/vocab.txt @@ -0,0 +1,41 @@ +a +| +k +i +e +j +n +' +á +r +u +p +l +o +m +w +t +h +é +c +y +ú +ñ +ó +í +s +d +— +b +f +g +z +v +q +x +0 +- +4 +6 +1 + diff --git a/models/yea/G_100000.pth b/models/yea/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bf6f54ff83339324963792e394a88794cfb52ec0 --- /dev/null +++ b/models/yea/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e9c5563bf99a830a79dafff1c28dc3d2c3eec3a86a1d84857a323e4ecd491a5 +size 145491470 diff --git a/models/yea/config.json b/models/yea/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yea/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yea/vocab.txt b/models/yea/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3c29a10def9b097e5346575e7045c204c29ac230 --- /dev/null +++ b/models/yea/vocab.txt @@ -0,0 +1,47 @@ +ഓ +ാ +മ +ഞ +യ +ണ +അ +വ +ല +ഊ +ഉ +് +ക +േ +ഇ +െ +ട +ആ +ത +ര +ജ +ൊ +ഒ +ന +ഗ +ൂ +ദ +ബ +ു +ഈ +പ +ങ +‍ +ോ +ഏ +ീ +' +ച +ി +എ +— +ള +ഡ + +റ +ൺ +_ diff --git a/models/yka/G_100000.pth b/models/yka/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2b0c33afcbb6e9276023dc4ead5b2d95e9f75b4 --- /dev/null +++ b/models/yka/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4304e1ef7d1e24abd231325f9f0fda6282545e9ec52cbee266386b78c312ed16 +size 145479930 diff --git a/models/yka/config.json b/models/yka/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yka/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yka/vocab.txt b/models/yka/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..7da36d826d8309bdc24a523461ad454adc21464b --- /dev/null +++ b/models/yka/vocab.txt @@ -0,0 +1,32 @@ +ō +c +n +w +i +s +t +ꞌ +h +k +j +ī +ū +r + +- +a +p +ē +ã +' +u +o +e +y +l +m +g +d +b +_ +ā diff --git a/models/yli/G_100000.pth b/models/yli/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..53602986d35bfbbd81ba4ead6abc8d7b34567dda --- /dev/null +++ b/models/yli/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363a092f38d397af92411126daaad3e7801f72abe66174e75809b1cdaef1c171 +size 145476094 diff --git a/models/yli/config.json b/models/yli/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yli/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yli/vocab.txt b/models/yli/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..70e02360ba773621436c670b4cf9de617910ec65 --- /dev/null +++ b/models/yli/vocab.txt @@ -0,0 +1,27 @@ +| +a +n +e +i +u +o +h +g +l +m +t +r +k +s +w +p +b +f +y +d +- +' +z +j +c + diff --git a/models/yor/G_100000.pth b/models/yor/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..28823a6495f5b536700176d0d95d973f6ffc104c --- /dev/null +++ b/models/yor/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af5caab6c8c4ea460b5fb9082bbf1adeef035b72c918870cd0958bf65b8eb58 +size 145488385 diff --git a/models/yor/config.json b/models/yor/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yor/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yor/vocab.txt b/models/yor/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..b3960d6d89df52efdac53b186d1f6378452a52d8 --- /dev/null +++ b/models/yor/vocab.txt @@ -0,0 +1,43 @@ +| +n +í +ọ +i +à +ẹ +t +a +r +b +ì +w +́ +l +̀ +k +á +s +ú +o +g +m +y +ó +p +u +e +j +ò +f +ṣ +é +d +è +ù +ń +h +- +ǹ +— +' + diff --git a/models/yre/G_100000.pth b/models/yre/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..62f2a018612d9bf1c4a3aebba31734d705638a52 --- /dev/null +++ b/models/yre/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b7af5abde126084ef299611cba0e69818b42e76f90aee656643bbc1cce0e33 +size 145479919 diff --git a/models/yre/config.json b/models/yre/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yre/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yre/vocab.txt b/models/yre/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cde515cdd2ea3bae45c280ebe850c27fcb4d0c07 --- /dev/null +++ b/models/yre/vocab.txt @@ -0,0 +1,32 @@ +s +_ +y +j +z +n +ɛ +f +a +c +r +v +' + +m +d +ɔ +t +h +p +o +ɩ +- +e +g +u +k +ʋ +l +w +i +b diff --git a/models/yua/G_100000.pth b/models/yua/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..8d3924487101b4679109e6b92686da37f8da1923 --- /dev/null +++ b/models/yua/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9fcdc7aa79306dabf3df4cb43fa9426a9e4e5975fb65817bf8975364a6ede11 +size 145482375 diff --git a/models/yua/config.json b/models/yua/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yua/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yua/vocab.txt b/models/yua/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..48e000172125dc558f1bce7b462f57e5ef52e929 --- /dev/null +++ b/models/yua/vocab.txt @@ -0,0 +1,35 @@ +| +a +' +e +l +u +o +t +i +c +n +b +j +m +y +x +á +k +s +h +é +p +z +q +d +w +ú +ó +í +r +g +f +v +ñ + diff --git a/models/yuz/G_100000.pth b/models/yuz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..28681ebc97c4d189c4a46e2f6226de83f6f136e4 --- /dev/null +++ b/models/yuz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93204acd42c756d24cfa7c0c64572ecda96a00bd2345c465cfe4870e0f589e4d +size 145493081 diff --git a/models/yuz/config.json b/models/yuz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yuz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yuz/vocab.txt b/models/yuz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..245da674032fbf52c20671c3a2245383cd3658c6 --- /dev/null +++ b/models/yuz/vocab.txt @@ -0,0 +1,49 @@ +á +ä +í +l +u +' +h +2 +t +r +6 +i +e +ó +é +9 +‐ +b +n +a +ë +k +j +ñ +v +z +- +s +y +c +d +3 +o +x +q +f +1 +5 + +m +0 +_ +8 +4 +ü +ú +g +p +7 diff --git a/models/yva/G_100000.pth b/models/yva/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..652f53bbe96e2c0e7ef5ddfe13d225f9ea92d5f3 --- /dev/null +++ b/models/yva/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef888e0f52d649f992a33763d129445c492beb9fa5e4db09dcdf0e3ec498609 +size 145476859 diff --git a/models/yva/config.json b/models/yva/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/yva/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/yva/vocab.txt b/models/yva/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..31e463b5457d763e408cfbc39443e34b855f474c --- /dev/null +++ b/models/yva/vocab.txt @@ -0,0 +1,28 @@ +a +| +n +o +e +i +m +r +u +t +s +y +w +v +p +k +b +j +d +g +l +h +' +z +- +f +c + diff --git a/models/zaa/G_100000.pth b/models/zaa/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e01f2282083f69c3709c2717edd70572c4c09f50 --- /dev/null +++ b/models/zaa/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ad6b328bd53aba06da1a0a39aec604238498fbd3ee96ffde13f405f3e31d94 +size 145492217 diff --git a/models/zaa/config.json b/models/zaa/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zaa/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zaa/vocab.txt b/models/zaa/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9ac8cd8f87dd4753f6a3df4bb1526a333ee8af0b --- /dev/null +++ b/models/zaa/vocab.txt @@ -0,0 +1,48 @@ +| +a +n +i +u +' +e +t +l +c +r +s +b +o +d +q +p +á +ą +h +g +í +y +à +m +é +ì +è +ỹ +į +ú +ù +j +x +f +ó +v +́ +z +ñ +ò +̀ +k +ü +̨ +ı +ẗ + diff --git a/models/zab/G_100000.pth b/models/zab/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e6391061c2e0c63d5926802fdfbeb314ec7b70a --- /dev/null +++ b/models/zab/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7a60d80b9e5920c10c96d013a1c1d74ce9cdce5cf713b5cb9124c617d97576 +size 145483783 diff --git a/models/zab/config.json b/models/zab/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zab/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zab/vocab.txt b/models/zab/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..efcd938069197b9deb87edac2cc0b67c9d65b0c7 --- /dev/null +++ b/models/zab/vocab.txt @@ -0,0 +1,37 @@ +u +f +d +r +e +x +ó +v +j +ü +k +t +— +q +ú +l +z +á +m +n +_ + +é +ɨ +h +i +g +́ +s +y +b +ñ +í +o +p +a +c diff --git a/models/zac/G_100000.pth b/models/zac/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..fb9810bbd4c1482c77e013e88b88c1745b2084ac --- /dev/null +++ b/models/zac/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a6404d86365d882d1791230252fd818563c2f287cda61986b1c3b0d4fa4dbfe +size 145483021 diff --git a/models/zac/config.json b/models/zac/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zac/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zac/vocab.txt b/models/zac/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dd5b4814dffb4bb44f67ad5c8dfcdb73276b9b17 --- /dev/null +++ b/models/zac/vocab.txt @@ -0,0 +1,36 @@ +á +f +_ +́ +q +u +ú +í +k +c +ü +j +e +y +a +s +p +i +o +d +é +z +v +ñ +b +ʼ +m +t +l + +r +h +g +ó +n +ë diff --git a/models/zad/G_100000.pth b/models/zad/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0a99a6b7df6717d3c98ee5029cfd477eaf485786 --- /dev/null +++ b/models/zad/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f600e5181fc58c3f93a6b1dfd455e1b338b1bca90f1612c50bac14431276985 +size 145486180 diff --git a/models/zad/config.json b/models/zad/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zad/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zad/vocab.txt b/models/zad/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..b126e914c7103e02b58bd8f4c3ab9456ae3c58df --- /dev/null +++ b/models/zad/vocab.txt @@ -0,0 +1,40 @@ +| +a +ꞌ +e +n +i +l +o +h +k +c +b +g +d +z +s +y +w +ṉ +t +ḻ +̱ +j +x +r +p +m +í +u +ə +ú +- +á +é +ó +f +v +q +ñ + diff --git a/models/zae/G_100000.pth b/models/zae/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e973b4e6213e3f5bf596444c69d40e3cd9ccc26f --- /dev/null +++ b/models/zae/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ac6977f3b1618bb6ccc64c453585c6d2df01e365c44f2659f03d22dca6cc97 +size 145483753 diff --git a/models/zae/config.json b/models/zae/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zae/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zae/vocab.txt b/models/zae/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..d9240b6e2993576af49871bf217c697c4de194c2 --- /dev/null +++ b/models/zae/vocab.txt @@ -0,0 +1,37 @@ +| +a +n +e +i +t +u +l +b +k +h +r +s +g +d +o +' +x +y +á +í +c +‐ +w +ú +m +p +j +é +f +ó +v +z +q +ñ +- + diff --git a/models/zai/G_100000.pth b/models/zai/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9fc556d5f9bacc60461bce49a1589d7fd3b48790 --- /dev/null +++ b/models/zai/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26014748e4a1d9f3162b823f6ce3ad658c2ba36b258a5311c029fd31d4651810 +size 145483767 diff --git a/models/zai/config.json b/models/zai/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zai/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zai/vocab.txt b/models/zai/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1c8f61e4af99d276ffbf186f00b994fbb3c595cb --- /dev/null +++ b/models/zai/vocab.txt @@ -0,0 +1,37 @@ +| +a +i +u +n +e +c +d +r +b +l +t +s +x +g +' +o +p +h +q +z +á +é +y +m +í +ñ +j +ú +ó +— +f +v +ü +k +- + diff --git a/models/zam/G_100000.pth b/models/zam/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..42e4e462632840e884f378b19c5fce3b7d9ff74b --- /dev/null +++ b/models/zam/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15d6c621c7960a63680534951c1700f5d7d139b42925275dbb0fc233884017c4 +size 145486829 diff --git a/models/zam/config.json b/models/zam/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zam/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zam/vocab.txt b/models/zam/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..1d4e4975f62e5ebc954db455dddf072427d0b73b --- /dev/null +++ b/models/zam/vocab.txt @@ -0,0 +1,41 @@ +| +n +o +e +ꞌ +a +l +c +i +d +u +t +m +s +x +g +b +r +y +h +è +p +z +' +q +à +ó +j +ú +ì +ò +á +ñ +é +f +ù +í +v +ü +k + diff --git a/models/zao/G_100000.pth b/models/zao/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..48f72bf5a7a286dedcb6d1d863d8a4346b6f347a --- /dev/null +++ b/models/zao/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f27dd459f3bef091777331d42308648e1ae1dd98a1baab4da02dbc54e700fa1 +size 145482252 diff --git a/models/zao/config.json b/models/zao/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zao/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zao/vocab.txt b/models/zao/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..57910a8f87717919c0556fdd81d3192663edc48e --- /dev/null +++ b/models/zao/vocab.txt @@ -0,0 +1,35 @@ +o +t +e +j +_ +b +á +x +k +g +y +q +i +l +m +v + +u +s +n +w +a +é +ú +z +r +í +f +ʼ +p +ó +d +h +c +- diff --git a/models/zaq/G_100000.pth b/models/zaq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..92687daa464e24371d4375ae5e4a134825f39451 --- /dev/null +++ b/models/zaq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eda2eff80255792888c64944cb3f452ba0f3b2751839227875209be027cf9942 +size 145491427 diff --git a/models/zaq/config.json b/models/zaq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zaq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zaq/vocab.txt b/models/zaq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..c84c1aa26723d6e85ed3cf8e71e74837c97777c0 --- /dev/null +++ b/models/zaq/vocab.txt @@ -0,0 +1,47 @@ +| +a +n +i +e +u +t +l +b +c +s +h +r +g +d +o +y +ꞌ +x +á +q +í +̱ +‐ +ú +m +p +j +ä +ü +- +é +f +ó +v +z +k +β +' +ο +ñ +ε +5 +ι +1 +8 + diff --git a/models/zar/G_100000.pth b/models/zar/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..0143e8519406c9e0d726c673d1bb900066b31dce --- /dev/null +++ b/models/zar/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21dcc18758a02d9fa05f5236ff5a1f02eeff7295e1c4c1b3a741520ba77cbd68 +size 145486069 diff --git a/models/zar/config.json b/models/zar/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zar/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zar/vocab.txt b/models/zar/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0aec0684dfb82d5068de94b1f08415c2c174e822 --- /dev/null +++ b/models/zar/vocab.txt @@ -0,0 +1,40 @@ +g +t + +z +á +ö +ú +q +o +ž +é +ë +i +d +j +p +h +ü +x +l +ʼ +- +e +n +ǘ +ñ +ó +f +_ +í +a +m +v +́ +b +c +s +y +u +r diff --git a/models/zas/G_100000.pth b/models/zas/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6a172f90f45ebd32c7738931c88b3f1c851a1073 --- /dev/null +++ b/models/zas/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:666d1fc49f0577acc0bcb8a1af0ffc3329ea74ac3030b00771bf2ec2aaca7109 +size 145484519 diff --git a/models/zas/config.json b/models/zas/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zas/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zas/vocab.txt b/models/zas/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..eec44bec15fd5cb497406995feacc9768d8734c7 --- /dev/null +++ b/models/zas/vocab.txt @@ -0,0 +1,38 @@ +| +a +i +n +e +r +d +l +b +j +g +u +o +x +c +m +t +s +ë +z +' +y +p +h +ü +é +w +̱ +q +ú +á +v +f +ó +í +k +ñ + diff --git a/models/zav/G_100000.pth b/models/zav/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c0ccaf726ff63c1b0843f1bf5bb0310860081f3 --- /dev/null +++ b/models/zav/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc5b252e87bfd3d4df748d141eb3371908de75152f725bb6098ebeaad2f4b7a +size 145485415 diff --git a/models/zav/config.json b/models/zav/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zav/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zav/vocab.txt b/models/zav/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..69669397316eb5657416df54c09a568b34611af5 --- /dev/null +++ b/models/zav/vocab.txt @@ -0,0 +1,39 @@ +̱ +š +e +p +ɟ +f +h +i +ḻ +j +z +x +d +m +y +o +ɉ +ə +ṉ +ü +k +g + +c +ž +_ +ñ +q +a +' +l +u +— +r +t +n +w +b +s diff --git a/models/zaw/G_100000.pth b/models/zaw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..4d0e7df3e1e90a14d3215f572bafa049377ddd3e --- /dev/null +++ b/models/zaw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:186c33aff13aca81f9b570396abe0dad7cefa2b1f491759956863b099f8fde15 +size 145489119 diff --git a/models/zaw/config.json b/models/zaw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zaw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zaw/vocab.txt b/models/zaw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ea38978d500d93a1e49a0979a9d6badc36b9b5b8 --- /dev/null +++ b/models/zaw/vocab.txt @@ -0,0 +1,44 @@ +| +i +a +n +e +u +j +r +c +d +l +s +x +t +g +o +b +ä +ꞌ +h +z +ṉ +p +ḻ +y +m +ú +w +q +é +f +- +ṟ +í +̲ +ó +́ +á +v +' +k +ñ +ü + diff --git a/models/zca/G_100000.pth b/models/zca/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..94481569e1e4aebfbde4686c249c8230cbf24568 --- /dev/null +++ b/models/zca/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc1367d0a9c82b219e35513a0de63a8ae07b6a78b3f76c3a511ca0e9aee25b4 +size 145486159 diff --git a/models/zca/config.json b/models/zca/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zca/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zca/vocab.txt b/models/zca/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..e904d26e59c6c0e7cd6fdab29ce21d3f85f6d010 --- /dev/null +++ b/models/zca/vocab.txt @@ -0,0 +1,40 @@ +| +a +n +ꞌ +i +o +e +l +r +k +z +h +m +x +b +s +u +g +t +d +ë +p +w +y +c +j +ú +ñ +á +é +f +ó +í +v +̱ +q +- +— +' + diff --git a/models/zga/G_100000.pth b/models/zga/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..9c919c81026290f2f6581661aa25014962965fca --- /dev/null +++ b/models/zga/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb36438b71edd10399eb26d448a009b9385ff339a8e81a9d47ae59e9fbdb0615 +size 145477717 diff --git a/models/zga/config.json b/models/zga/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zga/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zga/vocab.txt b/models/zga/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a3b1bb20d046af2b28c78750390eb62fd8cdd4ed --- /dev/null +++ b/models/zga/vocab.txt @@ -0,0 +1,29 @@ +g +u + +v +ʉ +p +k +f +m +_ +e +y +ʼ +s +a +z +j +n +d +- +o +l +w +t +i +ɨ +b +h +' diff --git a/models/zim/G_100000.pth b/models/zim/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1adb25edbd1d7118a5abab97593dc07eb2e171c9 --- /dev/null +++ b/models/zim/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6572d498b344472f63f502309061fca6cb6535c5b6bdcac74c66aca75034cf5 +size 145487595 diff --git a/models/zim/config.json b/models/zim/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zim/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zim/vocab.txt b/models/zim/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..375cb2c58b1d102a722c623352df9305ef8b1b4c --- /dev/null +++ b/models/zim/vocab.txt @@ -0,0 +1,42 @@ +v +ï +k +ù +ʼ +û +a +c +l +w +g +t +z +h +m +s +' +j +á +u +n +p +f +ô +- + +â +î +_ +ú +r +i +o +d +y +b +à +ê +e +— +ˮ +é diff --git a/models/ziw/G_100000.pth b/models/ziw/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..6987be695b38bcd13e7c40cc7ec7e0b7e70cabb2 --- /dev/null +++ b/models/ziw/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee965f6f3eb2ad0f319d869b70321cb30612fd3931c926f455f07fc22e4bb4b +size 145475409 diff --git a/models/ziw/config.json b/models/ziw/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ziw/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ziw/vocab.txt b/models/ziw/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..013c44bf3448611891a929859a703a89a82e2f69 --- /dev/null +++ b/models/ziw/vocab.txt @@ -0,0 +1,26 @@ +a +' +j +g +w +e +c +u + +t +_ +f +l +s +h +i +v +d +p +m +k +z +y +n +b +o diff --git a/models/zlm/G_100000.pth b/models/zlm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8dfd30aa55005ae29132d28f2d74026eb55d5af --- /dev/null +++ b/models/zlm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:942be892c6aaeac884941e04098c2ea31b69849a1166f358281753ee4fa72eda +size 145481579 diff --git a/models/zlm/config.json b/models/zlm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zlm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zlm/vocab.txt b/models/zlm/vocab.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5341cfc6ff6909a2ddd466155ceca187613838e --- /dev/null +++ b/models/zlm/vocab.txt @@ -0,0 +1,34 @@ +y +g + +f +e +t +o +5 +j +_ +3 +k +– +i +a +0 +n +6 +d +u +c +' +h +q +- +p +m +w +r +4 +s +l +b +z diff --git a/models/zmz/G_100000.pth b/models/zmz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..83437ffdf2a8a54b7d1286595d7c5c4de2ff54c3 --- /dev/null +++ b/models/zmz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c42929c5e1b1af19996cf528ebd74155ce7eb85eca1adab09f35eeee6ecf3d8a +size 145499907 diff --git a/models/zmz/config.json b/models/zmz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zmz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zmz/vocab.txt b/models/zmz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f8b3728749a2c2d2beef3086ab16f87e4e66770a --- /dev/null +++ b/models/zmz/vocab.txt @@ -0,0 +1,58 @@ +ú +j +ã +ʉ +à +k +ẽ +1 +n +g +o +v +3 +u +2 +e +d +ɔ +ù +7 +ì +p +6 +ò +œ +4 +- +í +ũ +_ +l +́ +i +t +r + +ó +s +õ +9 +0 +é +' +a +z +5 +m +w +8 +y +̀ +b +ĩ +̃ +h +f +è +á diff --git a/models/zne/G_100000.pth b/models/zne/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..12f35852c939dcec49052f58017532086e73de0c --- /dev/null +++ b/models/zne/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a001233fe917edfc5662c40c4a01e4ddd454ba874932d64ec7a32fb48f0d49a3 +size 145484547 diff --git a/models/zne/config.json b/models/zne/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zne/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zne/vocab.txt b/models/zne/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..41b6c316fc14bf9dcb28149c3f66a40cf9898f48 --- /dev/null +++ b/models/zne/vocab.txt @@ -0,0 +1,38 @@ +û +ĩ +k +b +z +o +ò + +p +r +d +f +ã +m +ä +0 +i +t +ô +y +s +g +e +4 +v +ũ +2 +u +õ +a +ẽ +n +â +_ +h +w +- +ì diff --git a/models/zos/G_100000.pth b/models/zos/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..e12a056cba580a1fd7b960a0746c66409c1252da --- /dev/null +++ b/models/zos/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067225578e9715358461f65d4440bba1b0b6a2cad16012955c9f070f287c2688 +size 145484511 diff --git a/models/zos/config.json b/models/zos/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zos/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zos/vocab.txt b/models/zos/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..3f471cfb66dc5076bc9d1b85cec2271fceb2f35b --- /dev/null +++ b/models/zos/vocab.txt @@ -0,0 +1,38 @@ +u +m +é +x +s +f +y +n +t +q +á +c +ö +e +' +a +d +ó +p +o +í +ü +v +b +ø +r +ú +ñ +h +j +z +i +_ +g + +k +ṉ +l diff --git a/models/zpc/G_100000.pth b/models/zpc/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c026466b3a26fcd10ea3ee2ff7ae18866c7e93e4 --- /dev/null +++ b/models/zpc/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72f43e1e2b8569d28f99bff67922f994a1c47d7f6c32eb543649924487cab0cc +size 145485309 diff --git a/models/zpc/config.json b/models/zpc/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpc/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpc/vocab.txt b/models/zpc/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..dae74619cb47fdf7c0242537db3558a8893ca3cd --- /dev/null +++ b/models/zpc/vocab.txt @@ -0,0 +1,39 @@ +n +y +r +ꞌ +c +s +p +t +u +l +o +k +m +v +̃ + +ó +ú +a +f +j +í +­ +x +á +q +_ +ë +b +g +é +ü +h +i +z +ñ +' +d +e diff --git a/models/zpg/G_100000.pth b/models/zpg/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..cfeb505cbe58f9aa73923c18df97227c8847b024 --- /dev/null +++ b/models/zpg/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861c4d7797179870c2b435a3195ff4ca6819c096967dd6c42815fb98b248eb51 +size 145490681 diff --git a/models/zpg/config.json b/models/zpg/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpg/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpg/vocab.txt b/models/zpg/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0979f8c4fdf420d72cce8d18419f36445693f038 --- /dev/null +++ b/models/zpg/vocab.txt @@ -0,0 +1,46 @@ +| +e +i +a +n +o +d +y +l +u +z +m +r +g +h +ñ +t +ꞌ +b +c +x +s +p +â +- +w +ü +q +î +ó +é +ḻ +j +‐ +ú +ṉ +í +ṟ +f +á +v +ê +ô +û +k + diff --git a/models/zpi/G_100000.pth b/models/zpi/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..27c3b6e0f174a5ee7c9252d0399c118758f2a18a --- /dev/null +++ b/models/zpi/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e8cae31bbd10929451fcc7dda002165bd984d7fb031b3b16d727583a88a5fc +size 145483773 diff --git a/models/zpi/config.json b/models/zpi/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpi/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpi/vocab.txt b/models/zpi/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..fc6a49a0fe9857f6d0900b7395084c2916299cd2 --- /dev/null +++ b/models/zpi/vocab.txt @@ -0,0 +1,37 @@ +| +e +n +a +o +l +d +r +y +i +u +m +t +g +z +k +x +s +w +b +p +h +ë +c +é +j +- +f +á +í +ó +v +́ +q +ú +ñ + diff --git a/models/zpl/G_100000.pth b/models/zpl/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..ffa1f9bc44c38b85bd406d7277e5e58ee8748b9f --- /dev/null +++ b/models/zpl/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f24cc275a0fae5cc32f8d70fc5f111fb806f6e00568b168207469840a8ee3f +size 145482235 diff --git a/models/zpl/config.json b/models/zpl/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpl/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpl/vocab.txt b/models/zpl/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a3439fc65d1a887fdbb3278f8cbac447bc958500 --- /dev/null +++ b/models/zpl/vocab.txt @@ -0,0 +1,35 @@ +a +c +l +ñ +ü +i +r +ó +e +j +o +b +u +p +n +g +h +v +k +t +x +í +d +' +y +q +_ +é + +á +m +ú +z +s +f diff --git a/models/zpm/G_100000.pth b/models/zpm/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..7c24c7a06af06e6405db9da7177c7bc02abfb477 --- /dev/null +++ b/models/zpm/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b52bf6c00d9fb487251a838c8b6926970bbfc739f9fab0af25e03a227977899 +size 145486847 diff --git a/models/zpm/config.json b/models/zpm/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpm/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpm/vocab.txt b/models/zpm/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..9801f43200634e9bf1e60534533bc620f4359222 --- /dev/null +++ b/models/zpm/vocab.txt @@ -0,0 +1,41 @@ +w +u +f +j +' +e +é +ü +_ +p +o +á +b +ó +m +t +ñ +k +l +‐ +v +q +n +ṉ +d +y +r +í +ḻ +x +ú +- +ë +i +s +h +a +g +z + +c diff --git a/models/zpo/G_100000.pth b/models/zpo/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..c050670b7198eb66e49a6e56e7feaba0e7fc010b --- /dev/null +++ b/models/zpo/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6ba9c77c2de6e9b5ab8c436c2208b809b4252cb3b28074e2ef82d58e3eacc1 +size 145491457 diff --git a/models/zpo/config.json b/models/zpo/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpo/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpo/vocab.txt b/models/zpo/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..f65f11f4ed1a6154cfbcd18684d278a449170b20 --- /dev/null +++ b/models/zpo/vocab.txt @@ -0,0 +1,47 @@ +- + +_ +3 +c +t +n +i +a +ú +7 +q +é +8 +m +2 +á +9 +v +k +y +u +r +6 +e +l +' +x +j +ñ +f +h +b +s +í +ó +p +g +o +— +4 +w +z +1 +5 +0 +d diff --git a/models/zpt/G_100000.pth b/models/zpt/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..544fd1fbb6620559d160a7d599de176f16e55561 --- /dev/null +++ b/models/zpt/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c75bcdda99439308fcbc01e4f93c5fdf17e53baa9e00a96761887233c67da7b1 +size 145484529 diff --git a/models/zpt/config.json b/models/zpt/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpt/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpt/vocab.txt b/models/zpt/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..380db342639a6bdbd8733796d6b14c6454140100 --- /dev/null +++ b/models/zpt/vocab.txt @@ -0,0 +1,38 @@ +ñ +c +k + +ó +ꞌ +u +w +ú +i +' +t +f +j +q +b +í +d +_ +r +y +e +á +n +h +m +l +‐ +a +o +p +v +s +g +z +— +é +x diff --git a/models/zpu/G_100000.pth b/models/zpu/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d42b891d559bb88506bd6d6d4453efe318454453 --- /dev/null +++ b/models/zpu/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40f6c30e681a57f03297a1c4ce2492bc564390ccc38d2431a23355ec128ffc8 +size 145483783 diff --git a/models/zpu/config.json b/models/zpu/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpu/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpu/vocab.txt b/models/zpu/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..a19e3b62e5202b8fdb52ec90370b2148b322a0c3 --- /dev/null +++ b/models/zpu/vocab.txt @@ -0,0 +1,37 @@ +a +g +- +á +b +ñ +i +í +t + +f +' +l +— +d +z +p +s +j +ú +x +n +w +q +r +u +_ +v +é +h +e +k +o +c +m +ó +y diff --git a/models/zpz/G_100000.pth b/models/zpz/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..76a6238cef0b7475eecaa1760fb92085e334c2a9 --- /dev/null +++ b/models/zpz/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f52d72e75e9f30ae577025f0e5efdcc00bc26f9adb25b35c0d2a90fd4344ce +size 145483739 diff --git a/models/zpz/config.json b/models/zpz/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zpz/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zpz/vocab.txt b/models/zpz/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..ef859c0afee46ee143674aa96a2904b892a7c9ad --- /dev/null +++ b/models/zpz/vocab.txt @@ -0,0 +1,37 @@ +ñ +z +s +é +e +g + +t +m +o +ü +r +j +q +k +a +i +' +b +c +̱ +l +_ +p +h +á +v +w +ú +y +ó +í +f +u +d +n +x diff --git a/models/ztq/G_100000.pth b/models/ztq/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..5e84569891206499a2f5e71af704f25d4a553513 --- /dev/null +++ b/models/ztq/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a28f72e952cdad65b8eda8ff61e205d80c9d5fd63748af65342e1c5821343f93 +size 145482975 diff --git a/models/ztq/config.json b/models/ztq/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/ztq/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/ztq/vocab.txt b/models/ztq/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..4c97efbb961d859c07e2dad71ecc8d555d8a487e --- /dev/null +++ b/models/ztq/vocab.txt @@ -0,0 +1,36 @@ +ó +ë +f +í +z +x +p + +ú +k +n +m +— +w +s +a +j +u +v +_ +e +g +d +t +ñ +o +q +l +y +r +b +á +é +c +i +h diff --git a/models/zty/G_100000.pth b/models/zty/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..1bf7a8d16b3895c04b693aacc0468752feb2be61 --- /dev/null +++ b/models/zty/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e4611ffad09bfe16f105e7e32640fc33883f7a3db027ecc6f1fd6d101426a77 +size 145485275 diff --git a/models/zty/config.json b/models/zty/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zty/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zty/vocab.txt b/models/zty/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..08872d2be1837caeba18d584cebc7af7f0a58401 --- /dev/null +++ b/models/zty/vocab.txt @@ -0,0 +1,39 @@ +í +̱ + +é +á +ẕ +ó +d +i +c +u +b +p +n +t +k +m +g +o +ḻ +l +r +- +_ +' +h +w +x +e +z +ž +j +ú +a +y +f +s +ñ +— diff --git a/models/zyb/G_100000.pth b/models/zyb/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..bdfaa19b77906c3eb82602a1c9357a8f14ee8e36 --- /dev/null +++ b/models/zyb/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb7c3fd3cd53235c42d5afcc57ec37e24d0d7a7e6e4b72a1b68b8d6a4c2ec74 +size 145479137 diff --git a/models/zyb/config.json b/models/zyb/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zyb/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zyb/vocab.txt b/models/zyb/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..994a74036a428e1f7461534c7b3206f827d26438 --- /dev/null +++ b/models/zyb/vocab.txt @@ -0,0 +1,31 @@ +| +n +e +a +g +i +h +u +o +j +d +z +q +w +m +c +b +s +y +x +l +v +r +f +k +t +' +p +— +- + diff --git a/models/zyp/G_100000.pth b/models/zyp/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..b32b69ce6c3a1dab50fca8e59f0c3a6895a7b001 --- /dev/null +++ b/models/zyp/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed6e5664a2619f364575ce4a8b010c893063d352ae1212ef4ee85a968e3e8624 +size 145486821 diff --git a/models/zyp/config.json b/models/zyp/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zyp/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zyp/vocab.txt b/models/zyp/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..cb3750662cc5f1dc4d75ace43243ba998b8af055 --- /dev/null +++ b/models/zyp/vocab.txt @@ -0,0 +1,41 @@ +| +a +h +n +i +t +e +g +w +c +l +k +u +p +o +s +m +r +y +z +b +v +d +á +j +f +- +0 +é +1 +2 +4 +í +5 +6 +ú +3 +7 +ó +' + diff --git a/models/zza/G_100000.pth b/models/zza/G_100000.pth new file mode 100644 index 0000000000000000000000000000000000000000..d49d741ae2fb2bb36dbde2df68f2ca1c831a187a --- /dev/null +++ b/models/zza/G_100000.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3523e6175d23833591eb3d0a1e4bca790efee8f973d49c41842e41e0d2de0c47 +size 145482331 diff --git a/models/zza/config.json b/models/zza/config.json new file mode 100644 index 0000000000000000000000000000000000000000..183f6cf4aa2db779aed3ddadc188bbb02e860690 --- /dev/null +++ b/models/zza/config.json @@ -0,0 +1,87 @@ +{ + "train": { + "log_interval": 200, + "eval_interval": 1000, + "seed": 1234, + "epochs": 20000, + "learning_rate": 0.0002, + "betas": [ + 0.8, + 0.99 + ], + "eps": 1e-09, + "batch_size": 64, + "fp16_run": true, + "lr_decay": 0.999875, + "segment_size": 8192, + "init_lr_ratio": 1, + "warmup_epochs": 0, + "c_mel": 45, + "c_kl": 1.0 + }, + "data": { + "training_files": "train.ltr", + "validation_files": "dev.ltr", + "text_cleaners": [ + "transliteration_cleaners" + ], + "max_wav_value": 32768.0, + "sampling_rate": 16000, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_mel_channels": 80, + "mel_fmin": 0.0, + "mel_fmax": null, + "add_blank": true, + "n_speakers": 0, + "cleaned_text": true + }, + "model": { + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "n_layers_q": 3, + "use_spectral_norm": false + } +} \ No newline at end of file diff --git a/models/zza/vocab.txt b/models/zza/vocab.txt new file mode 100755 index 0000000000000000000000000000000000000000..0fde70484b16f7364d5272cbdaa7b7d603382565 --- /dev/null +++ b/models/zza/vocab.txt @@ -0,0 +1,35 @@ +| +a +e +n +i +r +ê +î +y +o +d +m +k +w +b +t +s +h +l +ş +z +v +x +j +u +q +g +p +c +ç +û +f +' +- +