RafaG commited on
Commit
be52395
1 Parent(s): 1c6d76f

Upload 3 files

Browse files
Files changed (3) hide show
  1. batch.ini +124 -0
  2. cloud_service_settings.ini +36 -0
  3. config.ini +80 -0
batch.ini ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # You can add as many sections for langauges as you need. Just add to the number in the [LANGUAGE-#] for each one
2
+ # Each section must contain the synth language code, synth voice name, and translation target language
3
+ # A value for synth_voice_gender only matters if the service requires it, like Google TTS, but the setting must be there even if not used
4
+
5
+ [SETTINGS]
6
+
7
+ # Enter the language numbers you wish to process when the batch file is read.
8
+ # This is also useful for multiple presets of a single language - You could just enable one of them at a time
9
+ # Just enter the numbers separated by commas: For example, to use LANGUAGE-1 and LANGUAGE-2, put "1,2"
10
+ # Please note that supported languages and their codes vary by service. See the supported languages for each service in the README
11
+ enabled_languages = 5
12
+
13
+ # You an use a full file path, or the name of the file if it's in the same directory
14
+ # The video file name will also be used to name the final output audio file
15
+ original_video_file_path = video.mp4
16
+ srt_file_path = subtitles.srt
17
+
18
+
19
+
20
+ [LANGUAGE-0]
21
+ # English / Testing
22
+ # Remember, the translation target code may be different from synth_language_code because it is the one used by the translation service, not the TTS service
23
+ # See the codes here: https://cloud.google.com/translate/docs/languages / https://www.deepl.com/docs-api/translating-text/request/
24
+ translation_target_language = en
25
+ # The language of the synthesized voice. Might not necessarily be exactly the same as target_language variable below
26
+ # Examples - English: en-US Spanish: es-us or es-mx
27
+ synth_language_code = en-US
28
+ # Choose one from here so it matches the target language: https://cloud.google.com/text-to-speech/docs/voices / https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt-tts
29
+ # Examples - English: en-US-Neural2-I (Google) or en-US-JasonNeural (Azure) Spanish: es-US-Neural2-B (Google) or es-MX-CecilioNeural (Azure)
30
+ synth_voice_name = en-US-JasonNeural
31
+ # Google Only: Should match the gender of the chosen voice above
32
+ synth_voice_gender = MALE
33
+
34
+
35
+ [LANGUAGE-1]
36
+ # Spanish
37
+ translation_target_language = es
38
+ synth_language_code = es-MX
39
+ synth_voice_name = es-MX-CecilioNeural
40
+ synth_voice_gender = MALE
41
+
42
+ [LANGUAGE-2]
43
+ # Hindi
44
+ translation_target_language = hi
45
+ synth_language_code = hi-IN
46
+ synth_voice_name = hi-IN-MadhurNeural
47
+ synth_voice_gender = MALE
48
+
49
+ [LANGUAGE-3]
50
+ # Arabic
51
+ translation_target_language = ar
52
+ synth_language_code = ar-EG
53
+ synth_voice_name = ar-EG-ShakirNeural
54
+ synth_voice_gender = MALE
55
+
56
+ [LANGUAGE-4]
57
+ # Russian
58
+ translation_target_language = ru
59
+ synth_language_code = ru-RU
60
+ synth_voice_name = ru-RU-DmitryNeural
61
+ synth_voice_gender = MALE
62
+
63
+ [LANGUAGE-5]
64
+ # Portuguese
65
+ translation_target_language = pt
66
+ synth_language_code = pt-BR
67
+ synth_voice_name = pt-BR-FabioNeural
68
+ synth_voice_gender = MALE
69
+
70
+ [LANGUAGE-6]
71
+ # Italian
72
+ translation_target_language = it
73
+ synth_language_code = it-IT
74
+ synth_voice_name = it-IT-DiegoNeural
75
+ synth_voice_gender = MALE
76
+
77
+ [LANGUAGE-7]
78
+ # Indonesian
79
+ translation_target_language = id
80
+ synth_language_code = id-ID
81
+ synth_voice_name = id-ID-ArdiNeural
82
+ synth_voice_gender = MALE
83
+
84
+ [LANGUAGE-8]
85
+ # Japanese
86
+ translation_target_language = ja
87
+ synth_language_code = ja-JP
88
+ synth_voice_name = ja-JP-NaokiNeural
89
+ synth_voice_gender = MALE
90
+
91
+ [LANGUAGE-9]
92
+ # Korean
93
+ translation_target_language = ko
94
+ synth_language_code = ko-KR
95
+ synth_voice_name = ko-KR-BongJinNeural
96
+ synth_voice_gender = MALE
97
+
98
+ [LANGUAGE-10]
99
+ # German
100
+ translation_target_language = de
101
+ synth_language_code = de-DE
102
+ synth_voice_name = de-DE-KasperNeural
103
+ synth_voice_gender = MALE
104
+
105
+ [LANGUAGE-11]
106
+ # Chinese (Mandarin Simplified)
107
+ translation_target_language = zh
108
+ synth_language_code = zh-CN
109
+ synth_voice_name = zh-CN-YunyeNeural
110
+ synth_voice_gender = MALE
111
+
112
+ [LANGUAGE-12]
113
+ # Turkish
114
+ translation_target_language = tr
115
+ synth_language_code = tr-TR
116
+ synth_voice_name = tr-TR-AhmetNeural
117
+ synth_voice_gender = MALE
118
+
119
+ [LANGUAGE-13]
120
+ #
121
+ translation_target_language =
122
+ synth_language_code =
123
+ synth_voice_name =
124
+ synth_voice_gender =
cloud_service_settings.ini ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [CLOUD]
2
+ # Which TTS service will you use?
3
+ # Possble Values: azure / google
4
+ tts_service = azure
5
+
6
+
7
+ # Which translation service will you use? DeepL is slower but more accurate
8
+ # Possble Values: google / deepl
9
+ # Note: If you will be skipping translation, this doesn't matter
10
+ translate_service = google
11
+
12
+ # In case the translation language is not supported by DeepL, use Google Translate as a fallback
13
+ # Ignored if translate_service is set to google
14
+ use_fallback_google_translate = True
15
+
16
+
17
+ # The project name / project ID in the Google Cloud console. Required for translating
18
+ google_project_id = your-project-name
19
+
20
+
21
+ # API Key for your DeepL account. Required for translating if translate_service = deepl
22
+ deepl_api_key = yourkeyxxxxxx
23
+
24
+
25
+ # API Key for your Speech resource in Azure (cognitive speech)
26
+ azure_speech_key = 9d05b045bd8e4477acfb9b9dd58be65c
27
+
28
+
29
+ # The Location/Region of the speech resource. This should be listed on the same page as the API keys.
30
+ # Example: eastus
31
+ azure_speech_region = brazilsouth
32
+
33
+
34
+ # Sends request to TTS service to create multiple audio clips simultaneously. MUCH faster.
35
+ # Currently only supported when using azure
36
+ batch_tts_synthesize = False
config.ini ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [SETTINGS]
2
+
3
+ # Set to True if you don't want to translate the subtitles. If so, ignore the language variables
4
+ skip_translation = True
5
+
6
+ # Set to True if you don't want to synthesize the audio. For example, if you already did that and are testing
7
+ skip_synthesize = True
8
+
9
+ # Set to True if you want to stop the program after translating the subtitles.
10
+ # For example, if you want to manually review the resulting subtitles before synthesizing the audio.
11
+ # Note that to resume the process, you must set this to False again and set skip_translation to True
12
+ stop_after_translation = False
13
+
14
+ # The BCP-47 language code for the original text language
15
+ original_language = pt-BR
16
+
17
+ # Applies to DeepL translations only - Whether to have it use more or less formal language
18
+ # Possible Values: default | more | less
19
+ formality_preference = default
20
+
21
+
22
+ # The format/codec of the final audio file
23
+ # Possible Values: mp3 | aac | wav
24
+ output_format = aac
25
+
26
+
27
+ # Must be a codec from 'Supported Audio Encodings' section here: https://cloud.google.com/speech-to-text/docs/encoding#audio-encodings
28
+ # This determines the codec returned by the API, not the one produced by the program! You probably shouldn't change this, it might not work otherwise
29
+ synth_audio_encoding = MP3
30
+
31
+
32
+ # Enter the native sample rate for the voice audio provided by the TTS service
33
+ # This is usually 24KHz (24000), but some services like Azure offer higher quality audio at 48KHz (48000)
34
+ # Enter only number digits, no commas or anything
35
+ synth_sample_rate = 24000
36
+
37
+
38
+ # This will drastically improve the quality of the final result, BUT see note below
39
+ # Note! Setting this to true will make it so instead of just stretching the audio clips, it will have the API generate new audio clips with adjusted speaking rates
40
+ # This can't be done on the first pass because we don't know how long the audio clips will be until we generate them
41
+ two_pass_voice_synth = True
42
+
43
+
44
+ # On the second pass, each audio clip will be extremely close to the desired length, but a bit off
45
+ # Set this to True if you want to stretch the second-pass clip anyway to be exact, down to the millisecond
46
+ # However, this will degrade the voice and make it sound similar to if it was just 1-Pass
47
+ force_stretch_with_twopass = False
48
+
49
+
50
+ # Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a period between sentences
51
+ # Set it to "default" to keep it default which is quite slow. I find 80ms is pretty good
52
+ # Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
53
+ # Possible values: default | Any integer
54
+ azure_sentence_pause = 80
55
+
56
+ # Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a comma.
57
+ # Set it to "default" to keep it default which is quite slow.
58
+ # It doesn't seem to follow this number exactly, and seems to have a minimum around 50ms
59
+ # Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
60
+ # Possible values: default | Any integer
61
+ azure_comma_pause = 50
62
+
63
+
64
+ # Adds a silence buffer between each spoken clip, but keeps the speech "centered" at the right spot so it's still synced
65
+ # > To be clear the total length of the audio file will remain the same, each spoken clip gets shrunk within it
66
+ # Useful if your subtitles file butts all the beginning and end timings right up against each other
67
+ # Note, this applies both before and after, so the total extra between clips will be 2x this
68
+ # Warning, setting this too high could result in the TTS speaking extremely fast to fit into remaining clip duration
69
+ # > Around 25 - 50 milliseconds is a good starting point
70
+ add_line_buffer_milliseconds = 0
71
+
72
+
73
+ # If the combination of two adjacent subtitle lines is below this amount, and one starts at the same time the other ends, it will combine the lines
74
+ # This should improve the speech synthesis by reducing unnatural splits in spoken sentences.
75
+ # Setting this to zero or a low number will effectively disable it
76
+ combine_subtitles_max_chars = 200
77
+
78
+
79
+ # Mostly prevents the program from deleting files in the working directory, and also generates files for each audio step
80
+ debug_mode = False