File size: 3,908 Bytes
da0e3ab
73fd4c0
 
ed64e04
73fd4c0
 
 
 
 
 
 
 
 
cf7b168
7b3eb41
73fd4c0
2d49e86
 
 
 
 
73fd4c0
233c677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902b7eb
 
 
233c677
 
 
4eae89a
 
9462754
4eae89a
43edaa1
 
 
578e8ab
4eae89a
233c677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73fd4c0
 
 
 
 
 
 
 
 
 
 
 
4726977
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import tempfile
import gradio as gr
import subprocess
import os, stat
from googletrans import Translator
from TTS.api import TTS
import ffmpeg
import whisper
from scipy.signal import wiener
import soundfile as sf
from pydub import AudioSegment
import numpy as np
import librosa
from zipfile import ZipFile

os.environ["COQUI_TOS_AGREED"] = "1"

ZipFile("ffmpeg.zip").extractall()
st = os.stat('ffmpeg')
os.chmod('ffmpeg', st.st_mode | stat.S_IEXEC)

def process_video(video, high_quality, target_language):
    output_filename = "resized_video.mp4"
    if high_quality:
        ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
        video_path = output_filename
    else:
        video_path = video

    # Debugging Step 1: Check if video_path exists
    if not os.path.exists(video_path):
        return f"Error: {video_path} does not exist."

    ffmpeg.input(video_path).output('output_audio.wav', acodec='pcm_s24le', ar=48000, map='a').run()

    y, sr = sf.read("output_audio.wav")
    y = y.astype(np.float32)
    y_denoised = wiener(y)
    sf.write("output_audio_denoised.wav", y_denoised, sr)

    sound = AudioSegment.from_file("output_audio_denoised.wav", format="wav")
    sound = sound.apply_gain(0)  # Reduce gain by 5 dB
    sound = sound.low_pass_filter(3000).high_pass_filter(100)
    sound.export("output_audio_processed.wav", format="wav")

    shell_command = f"ffmpeg -y -i output_audio_processed.wav -af lowpass=3000,highpass=100 output_audio_final.wav".split(" ")
    subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)

    model = whisper.load_model("base")
    result = model.transcribe("output_audio_final.wav")
    whisper_text = result["text"]
    whisper_language = result['language']
    
    print(whisper_text)
    
    language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
    target_language_code = language_mapping[target_language]
    translator = Translator()
    try:
        translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
        print(translated_text)
    except AttributeError as e:
        print("Failed to translate text. Likely an issue with token extraction in the Google Translate API.")
        translated_text = "Translation failed due to API issue."


        
    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
    tts.to('cuda')  # Replacing deprecated gpu=True
    tts.tts_to_file(translated_text, speaker_wav='output_audio_final.wav', file_path="output_synth.wav", language=target_language_code)

    pad_top = 0
    pad_bottom = 15
    pad_left = 0
    pad_right = 0
    rescaleFactor = 1

    # Debugging Step 2: Remove quotes around the video path
    video_path_fix = video_path

    cmd = f"python Wav2Lip/inference.py --checkpoint_path '/Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio 'output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile 'output_video.mp4'"
    subprocess.run(cmd, shell=True)
    # Debugging Step 3: Check if output video exists
    if not os.path.exists("output_video.mp4"):
        return "Error: output_video.mp4 was not generated."

    return "output_video.mp4"

iface = gr.Interface(
    fn=process_video,
    inputs=[
        gr.Video(),
        gr.inputs.Checkbox(label="High Quality"),
        gr.inputs.Dropdown(choices=["English", "Spanish", "French", "German", "Italian", "Portuguese", "Polish", "Turkish", "Russian", "Dutch", "Czech", "Arabic", "Chinese (Simplified)"], label="Target Language for Dubbing")
    ],
    outputs=gr.outputs.File(),
    live=False
)

iface.launch()