File size: 4,222 Bytes
be52395
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97edeb3
be52395
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
[SETTINGS]

	# Set to True if you don't want to translate the subtitles. If so, ignore the language variables
skip_translation = True

	# Set to True if you don't want to synthesize the audio. For example, if you already did that and are testing
skip_synthesize = True

    # Set to True if you want to stop the program after translating the subtitles. 
	# For example, if you want to manually review the resulting subtitles before synthesizing the audio.
	# Note that to resume the process, you must set this to False again and set skip_translation to True
stop_after_translation = False

	# The BCP-47 language code for the original text language
original_language = pt-BR

	# Applies to DeepL translations only - Whether to have it use more or less formal language
	# Possible Values: default | more | less
formality_preference = default


	# The format/codec of the final audio file
	# Possible Values:  mp3  |  aac  |  wav
output_format = aac


	# Must be a codec from 'Supported Audio Encodings' section here: https://cloud.google.com/speech-to-text/docs/encoding#audio-encodings
	# This determines the codec returned by the API, not the one produced by the program! You probably shouldn't change this, it might not work otherwise
synth_audio_encoding = MP3


	# Enter the native sample rate for the voice audio provided by the TTS service
	# This is usually 24KHz (24000), but some services like Azure offer higher quality audio at 48KHz (48000)
	# Enter only number digits, no commas or anything
synth_sample_rate = 24000


	# This will drastically improve the quality of the final result, BUT see note below
	# Note! Setting this to true will make it so instead of just stretching the audio clips, it will have the API generate new audio clips with adjusted speaking rates
	# This can't be done on the first pass because we don't know how long the audio clips will be until we generate them
two_pass_voice_synth = True


	# On the second pass, each audio clip will be extremely close to the desired length, but a bit off
	# Set this to True if you want to stretch the second-pass clip anyway to be exact, down to the millisecond
	# However, this will degrade the voice and make it sound similar to if it was just 1-Pass
force_stretch_with_twopass = False


	# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a period between sentences
	# Set it to "default" to keep it default which is quite slow. I find 80ms is pretty good
	# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
	# Possible values:  default  |  Any integer
azure_sentence_pause = 80

	# Azure Only: Sets the exact pause in milliseconds that the TTS voice will pause after a comma.
	# Set it to "default" to keep it default which is quite slow.
	# It doesn't seem to follow this number exactly, and seems to have a minimum around 50ms
	# Note: Changing this from default adds about 60 characters per line to the total Azure character usage count
	# Possible values:  default  |  Any integer
azure_comma_pause = 50


	# Adds a silence buffer between each spoken clip, but keeps the speech "centered" at the right spot so it's still synced
	#   >  To be clear the total length of the audio file will remain the same, each spoken clip gets shrunk within it
	# Useful if your subtitles file butts all the beginning and end timings right up against each other
	# Note, this applies both before and after, so the total extra between clips will be 2x this
	# Warning, setting this too high could result in the TTS speaking extremely fast to fit into remaining clip duration
	#   >  Around 25 - 50 milliseconds is a good starting point
add_line_buffer_milliseconds = 150


	# If the combination of two adjacent subtitle lines is below this amount, and one starts at the same time the other ends, it will combine the lines
	# This should improve the speech synthesis by reducing unnatural splits in spoken sentences.
	# Setting this to zero or a low number will effectively disable it
combine_subtitles_max_chars = 200


	# Mostly prevents the program from deleting files in the working directory, and also generates files for each audio step
debug_mode = False