Spaces:

artificialguybr
/

video-dubbing

Running on Zero

App Files Files Community

artificialguybr commited on Jul 5

Commit

23be978

•

1 Parent(s): a47bd89

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -84

app.py CHANGED Viewed

@@ -53,97 +53,102 @@ def check_for_faces(video_path):
 @spaces.GPU
 def process_video(radio, video, target_language, has_closeup_face):
-    if target_language is None:
-        return gr.Error("Please select a Target Language for Dubbing.")
-    run_uuid = uuid.uuid4().hex[:6]
-    output_filename = f"{run_uuid}_resized_video.mp4"
-    ffmpeg.input(video).output(output_filename, vf='scale=-2:720').run()
-    video_path = output_filename
-    if not os.path.exists(video_path):
-        return f"Error: {video_path} does not exist."
-    video_info = ffmpeg.probe(video_path)
-    video_duration = float(video_info['streams'][0]['duration'])
-    if video_duration > 60:
-        os.remove(video_path)
-        return gr.Error("Video duration exceeds 1 minute. Please upload a shorter video.")
-    ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
-    shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
-    subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
-    print("Attempting to transcribe with Whisper...")
-    try:
-        segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
-        whisper_text = " ".join(segment.text for segment in segments)
-        whisper_language = info.language
-        print(f"Transcription successful: {whisper_text}")
-    except RuntimeError as e:
-        print(f"RuntimeError encountered: {str(e)}")
-        if "CUDA failed with error device-side assert triggered" in str(e):
-            gr.Warning("Error. Space need to restart. Please retry in a minute")
-            api.restart_space(repo_id=repo_id)
-    language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
-    target_language_code = language_mapping[target_language]
-    translator = Translator()
-    translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
-    print(translated_text)
-    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
-    tts.to('cuda')
-    tts.tts_to_file(translated_text, speaker_wav=f"{run_uuid}_output_audio_final.wav", file_path=f"{run_uuid}_output_synth.wav", language=target_language_code)
-    pad_top = 0
-    pad_bottom = 15
-    pad_left = 0
-    pad_right = 0
-    rescaleFactor = 1
-    video_path_fix = video_path
-    if has_closeup_face:
-        has_face = True
-    else:
-        has_face = check_for_faces(video_path)
-    if has_closeup_face:
-        try:
-            cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path)} --audio '{run_uuid}_output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile '{run_uuid}_output_video.mp4'"
-            subprocess.run(cmd, shell=True, check=True)
-        except subprocess.CalledProcessError as e:
-            if "Face not detected! Ensure the video contains a face in all the frames." in str(e.stderr):
-                gr.Warning("Wav2lip didn't detect a face. Please try again with the option disabled.")
-                cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
-                subprocess.run(cmd, shell=True)
-    else:
-        cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
-        subprocess.run(cmd, shell=True)
-    if not os.path.exists(f"{run_uuid}_output_video.mp4"):
-        raise FileNotFoundError(f"Error: {run_uuid}_output_video.mp4 was not generated.")
-    output_video_path = f"{run_uuid}_output_video.mp4"
-    files_to_delete = [
-        f"{run_uuid}_resized_video.mp4",
-        f"{run_uuid}_output_audio.wav",
-        f"{run_uuid}_output_audio_final.wav",
-        f"{run_uuid}_output_synth.wav"
-    ]
-    for file in files_to_delete:
         try:
-            os.remove(file)
-        except FileNotFoundError:
-            print(f"File {file} not found for deletion.")
-    return output_video_path
 def swap(radio):
     if(radio == "Upload"):
         return gr.update(source="upload")
@@ -163,11 +168,12 @@ iface = gr.Interface(
                     value=False,
                     info="Say if video have close-up face. For Wav2lip. Will not work if checked wrongly.")
     ],
-    outputs=gr.Video(),
     live=False,
     title="AI Video Dubbing",
     description="""This tool was developed by [@artificialguybr](https://twitter.com/artificialguybr) using entirely open-source tools. Special thanks to Hugging Face for the GPU support. Thanks [@yeswondwer](https://twitter.com/@yeswondwerr) for original code. Test the [Video Transcription and Translate](https://huggingface.co/spaces/artificialguybr/VIDEO-TRANSLATION-TRANSCRIPTION) space!""",
     allow_flagging=False
 )
 with gr.Blocks() as demo:
     iface.render()

 @spaces.GPU
 def process_video(radio, video, target_language, has_closeup_face):
+    try:
+        if target_language is None:
+            raise ValueError("Please select a Target Language for Dubbing.")
+        run_uuid = uuid.uuid4().hex[:6]
+        output_filename = f"{run_uuid}_resized_video.mp4"
+        ffmpeg.input(video).output(output_filename, vf='scale=-2:720').run()
+        video_path = output_filename
+        if not os.path.exists(video_path):
+            raise FileNotFoundError(f"Error: {video_path} does not exist.")
+        video_info = ffmpeg.probe(video_path)
+        video_duration = float(video_info['streams'][0]['duration'])
+        if video_duration > 60:
+            os.remove(video_path)
+            raise ValueError("Video duration exceeds 1 minute. Please upload a shorter video.")
+        ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
+        shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
+        subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
+        print("Attempting to transcribe with Whisper...")
         try:
+            segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
+            whisper_text = " ".join(segment.text for segment in segments)
+            whisper_language = info.language
+            print(f"Transcription successful: {whisper_text}")
+        except RuntimeError as e:
+            print(f"RuntimeError encountered: {str(e)}")
+            if "CUDA failed with error device-side assert triggered" in str(e):
+                gr.Warning("Error. Space need to restart. Please retry in a minute")
+                api.restart_space(repo_id=repo_id)
+        language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
+        target_language_code = language_mapping[target_language]
+        translator = Translator()
+        translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
+        print(translated_text)
+        tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
+        tts.to('cuda')
+        tts.tts_to_file(translated_text, speaker_wav=f"{run_uuid}_output_audio_final.wav", file_path=f"{run_uuid}_output_synth.wav", language=target_language_code)
+        pad_top = 0
+        pad_bottom = 15
+        pad_left = 0
+        pad_right = 0
+        rescaleFactor = 1
+        video_path_fix = video_path
+        if has_closeup_face:
+            has_face = True
+        else:
+            has_face = check_for_faces(video_path)
+        if has_closeup_face:
+            try:
+                cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path)} --audio '{run_uuid}_output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile '{run_uuid}_output_video.mp4'"
+                subprocess.run(cmd, shell=True, check=True)
+            except subprocess.CalledProcessError as e:
+                if "Face not detected! Ensure the video contains a face in all the frames." in str(e.stderr):
+                    gr.Warning("Wav2lip didn't detect a face. Please try again with the option disabled.")
+                    cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
+                    subprocess.run(cmd, shell=True)
+        else:
+            cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
+            subprocess.run(cmd, shell=True)
+        if not os.path.exists(f"{run_uuid}_output_video.mp4"):
+            raise FileNotFoundError(f"Error: {run_uuid}_output_video.mp4 was not generated.")
+        output_video_path = f"{run_uuid}_output_video.mp4"
+        files_to_delete = [
+            f"{run_uuid}_resized_video.mp4",
+            f"{run_uuid}_output_audio.wav",
+            f"{run_uuid}_output_audio_final.wav",
+            f"{run_uuid}_output_synth.wav"
+        ]
+        for file in files_to_delete:
+            try:
+                os.remove(file)
+            except FileNotFoundError:
+                print(f"File {file} not found for deletion.")
+        return output_video_path
+    except Exception as e:
+        print(f"Error in process_video: {str(e)}")
+        return gr.update(value=None, visible=True), f"Error: {str(e)}"
 def swap(radio):
     if(radio == "Upload"):
         return gr.update(source="upload")
                     value=False,
                     info="Say if video have close-up face. For Wav2lip. Will not work if checked wrongly.")
     ],
+    outputs=[gr.Video(), gr.Textbox(label="Error Message")],
     live=False,
     title="AI Video Dubbing",
     description="""This tool was developed by [@artificialguybr](https://twitter.com/artificialguybr) using entirely open-source tools. Special thanks to Hugging Face for the GPU support. Thanks [@yeswondwer](https://twitter.com/@yeswondwerr) for original code. Test the [Video Transcription and Translate](https://huggingface.co/spaces/artificialguybr/VIDEO-TRANSLATION-TRANSCRIPTION) space!""",
     allow_flagging=False
 )
 with gr.Blocks() as demo:
     iface.render()