|
import soundfile |
|
import pyrubberband |
|
import configparser |
|
import pathlib |
|
import os |
|
import io |
|
|
|
from Scripts.shared_imports import * |
|
import Scripts.TTS as TTS |
|
from Scripts.utils import parseBool |
|
|
|
from pydub import AudioSegment |
|
from pydub.silence import detect_leading_silence |
|
import langcodes |
|
|
|
|
|
workingFolder = "workingFolder" |
|
|
|
|
|
def trim_clip(inputSound): |
|
trim_leading_silence: AudioSegment = lambda x: x[detect_leading_silence(x) :] |
|
trim_trailing_silence: AudioSegment = lambda x: trim_leading_silence(x.reverse()).reverse() |
|
strip_silence: AudioSegment = lambda x: trim_trailing_silence(trim_leading_silence(x)) |
|
strippedSound = strip_silence(inputSound) |
|
return strippedSound |
|
|
|
|
|
def insert_audio(canvas, audioToOverlay, startTimeMs): |
|
|
|
canvasCopy = canvas |
|
|
|
canvasCopy = canvasCopy.overlay(audioToOverlay, position=int(startTimeMs)) |
|
|
|
return canvasCopy |
|
|
|
|
|
def create_canvas(canvasDuration, frame_rate=int(config['synth_sample_rate'])): |
|
canvas = AudioSegment.silent(duration=canvasDuration, frame_rate=frame_rate) |
|
return canvas |
|
|
|
def get_speed_factor(subsDict, trimmedAudio, desiredDuration, num): |
|
virtualTempFile = AudioSegment.from_file(trimmedAudio, format="wav") |
|
rawDuration = virtualTempFile.duration_seconds |
|
trimmedAudio.seek(0) |
|
|
|
desiredDuration = float(desiredDuration) |
|
speedFactor = (rawDuration*1000) / desiredDuration |
|
subsDict[num]['speed_factor'] = speedFactor |
|
return subsDict |
|
|
|
def stretch_audio(audioFileToStretch, speedFactor, num): |
|
virtualTempAudioFile = io.BytesIO() |
|
|
|
y, sampleRate = soundfile.read(audioFileToStretch) |
|
|
|
streched_audio = pyrubberband.time_stretch(y, sampleRate, speedFactor, rbargs={'--fine': '--fine'}) |
|
|
|
soundfile.write(virtualTempAudioFile, streched_audio, sampleRate, format='wav') |
|
if config['debug_mode']: |
|
soundfile.write(os.path.join(workingFolder, f'{num}_s.wav'), streched_audio, sampleRate) |
|
|
|
return AudioSegment.from_file(virtualTempAudioFile, format="wav") |
|
|
|
|
|
from pydub import AudioSegment |
|
|
|
def build_audio(subsDict, langDict, totalAudioLength, twoPassVoiceSynth=False): |
|
if cloudConfig['tts_service'] == 'azure': |
|
twoPassVoiceSynth = False |
|
|
|
virtualTrimmedFileDict = {} |
|
|
|
for key, value in subsDict.items(): |
|
filePathTrimmed = os.path.join(workingFolder, str(key)) + "_t.wav" |
|
subsDict[key]['TTS_FilePath_Trimmed'] = filePathTrimmed |
|
|
|
|
|
|
|
rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate'])) |
|
trimmedClip = trim_clip(rawClip) |
|
|
|
if config['debug_mode']: |
|
trimmedClip.export(filePathTrimmed, format="wav") |
|
|
|
|
|
tempTrimmedFile = io.BytesIO() |
|
trimmedClip.export(tempTrimmedFile, format="wav") |
|
virtualTrimmedFileDict[key] = tempTrimmedFile |
|
keyIndex = list(subsDict.keys()).index(key) |
|
print(f" Trimmed Audio: {keyIndex + 1} of {len(subsDict)}", end="\r") |
|
print("\n") |
|
|
|
|
|
if not cloudConfig['tts_service'] == 'azure': |
|
|
|
for key, value in subsDict.items: |
|
|
|
subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key) |
|
keyIndex = list(subsDict.keys()).index(key) |
|
print(f" Calculated Speed Factor: {keyIndex + 1} of {len(subsDict)}", end="\r") |
|
print("\n") |
|
|
|
|
|
|
|
if twoPassVoiceSynth and not cloudConfig['tts_service'] == 'azure': |
|
if cloudConfig['batch_tts_synthesize'] and cloudConfig['tts_service'] == 'azure': |
|
subsDict = TTS.synthesize_dictionary_batch(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True) |
|
else: |
|
subsDict = TTS.synthesize_dictionary(subsDict, langDict, skipSynthesize=config['skip_synthesize'], secondPass=True) |
|
|
|
for key, value in subsDict.items: |
|
|
|
rawClip = AudioSegment.from_file(value['TTS_FilePath'], format="mp3", frame_rate=int(config['synth_sample_rate'])) |
|
trimmedClip = trim_clip(rawClip) |
|
if config['debug_mode']: |
|
|
|
secondPassTrimmedFile = value['TTS_FilePath_Trimmed'][:-4] + "_p2_t.wav" |
|
trimmedClip.export(secondPassTrimmedFile, format="wav") |
|
trimmedClip.export(virtualTrimmedFileDict[key], format="wav") |
|
keyIndex = list(subsDict.keys()).index(key) |
|
print(f" Trimmed Audio (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r") |
|
print("\n") |
|
|
|
if config['force_stretch_with_twopass']: |
|
for key, value in subsDict.items: |
|
subsDict = get_speed_factor(subsDict, virtualTrimmedFileDict[key], value['duration_ms'], num=key) |
|
keyIndex = list(subsDict.keys()).index(key) |
|
print(f" Calculated Speed Factor (2nd Pass): {keyIndex + 1} of {len(subsDict)}", end="\r") |
|
print("\n") |
|
|
|
|
|
canvas = create_canvas(totalAudioLength) |
|
|
|
|
|
for key, value in subsDict.items(): |
|
if (not twoPassVoiceSynth or config['force_stretch_with_twopass']) and not cloudConfig['tts_service'] == 'azure': |
|
|
|
stretchedClip = stretch_audio(virtualTrimmedFileDict[key], speedFactor=subsDict[key]['speed_factor'], num=key) |
|
else: |
|
|
|
stretchedClip = AudioSegment.from_file(virtualTrimmedFileDict[key], format="wav") |
|
virtualTrimmedFileDict[key].seek(0) |
|
|
|
canvas = insert_audio(canvas, stretchedClip, value['start_ms']) |
|
keyIndex = list(subsDict.keys()).index(key) |
|
print(f" Final Audio Processed: {keyIndex + 1} of {len(subsDict)}", end="\r") |
|
print("\n") |
|
|
|
|
|
|
|
lang = langcodes.get(langDict['languageCode']) |
|
langName = langcodes.get(langDict['languageCode']).get(lang.to_alpha3()).display_name() |
|
if config['debug_mode'] and not os.path.isfile(ORIGINAL_VIDEO_PATH): |
|
outputFileName = "debug" + f" - {langName} - {langDict['languageCode']}." |
|
else: |
|
outputFileName = pathlib.Path(ORIGINAL_VIDEO_PATH).stem + f" - {langName} - {langDict['languageCode']}." |
|
|
|
outputFileName = os.path.join(OUTPUT_FOLDER, outputFileName) |
|
|
|
|
|
outputFormat=config['output_format'].lower() |
|
if outputFormat == "mp3": |
|
outputFileName += "mp3" |
|
formatString = "mp3" |
|
elif outputFormat == "wav": |
|
outputFileName += "wav" |
|
formatString = "wav" |
|
elif outputFormat == "aac": |
|
|
|
|
|
outputFileName += "aac" |
|
formatString = "adts" |
|
|
|
canvas = canvas.set_channels(2) |
|
try: |
|
print("\nExporting audio file...") |
|
canvas.export(outputFileName, format=formatString, bitrate="192k") |
|
except: |
|
outputFileName = outputFileName + ".bak" |
|
canvas.export(outputFileName, format=formatString, bitrate="192k") |
|
print("\nThere was an issue exporting the audio, it might be a permission error. The file was saved as a backup with the extension .bak") |
|
print("Try removing the .bak extension then listen to the file to see if it worked.\n") |
|
input("Press Enter to exit...") |
|
|
|
return subsDict |
|
|