artificialguybr commited on
Commit
c9f8849
1 Parent(s): c39dfd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -53
app.py CHANGED
@@ -13,62 +13,63 @@ import librosa
13
 
14
  os.environ["COQUI_TOS_AGREED"] = "1"
15
  def process_video(video, high_quality, target_language):
16
- output_filename = "resized_video.mp4"
17
- if high_quality:
18
- ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
19
- video_path = output_filename
20
- else:
21
- video_path = video
 
 
22
 
23
- # Debugging Step 1: Check if video_path exists
24
- if not os.path.exists(video_path):
25
- return f"Error: {video_path} does not exist."
26
 
27
- ffmpeg.input(video_path).output('output_audio.wav', acodec='pcm_s24le', ar=48000, map='a').run()
 
28
 
29
- y, sr = sf.read("output_audio.wav")
30
- y = y.astype(np.float32)
31
- y_denoised = wiener(y)
32
- sf.write("output_audio_denoised.wav", y_denoised, sr)
33
-
34
- sound = AudioSegment.from_file("output_audio_denoised.wav", format="wav")
35
- sound = sound.apply_gain(0) # Reduce gain by 5 dB
36
- sound = sound.low_pass_filter(3000).high_pass_filter(100)
37
- sound.export("output_audio_processed.wav", format="wav")
38
-
39
- shell_command = f"ffmpeg -y -i output_audio_processed.wav -af lowpass=3000,highpass=100 output_audio_final.wav".split(" ")
40
- subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
41
-
42
- model = whisper.load_model("base")
43
- result = model.transcribe("output_audio_final.wav")
44
- whisper_text = result["text"]
45
- whisper_language = result['language']
46
-
47
- language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
48
- target_language_code = language_mapping[target_language]
49
- translator = Translator()
50
- translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
51
-
52
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
53
- tts.to('cuda') # Replacing deprecated gpu=True
54
- tts.tts_to_file(translated_text, speaker_wav='output_audio_final.wav', file_path="output_synth.wav", language=target_language_code)
55
-
56
- pad_top = 0
57
- pad_bottom = 15
58
- pad_left = 0
59
- pad_right = 0
60
- rescaleFactor = 1
61
-
62
- # Debugging Step 2: Remove quotes around the video path
63
- video_path_fix = video_path
64
-
65
- cmd = f"python Wav2Lip/inference.py --checkpoint_path '/Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio 'output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile 'output_video.mp4'"
66
- subprocess.run(cmd, shell=True)
67
- # Debugging Step 3: Check if output video exists
68
- if not os.path.exists("output_video.mp4"):
69
- return "Error: output_video.mp4 was not generated."
70
-
71
- return "output_video.mp4"
72
 
73
  iface = gr.Interface(
74
  fn=process_video,
 
13
 
14
  os.environ["COQUI_TOS_AGREED"] = "1"
15
  def process_video(video, high_quality, target_language):
16
+ with tempfile.TemporaryDirectory() as temp_dir:
17
+ output_filename = os.path.join(temp_dir, "resized_video.mp4")
18
+
19
+ if high_quality:
20
+ ffmpeg.input(video).output(output_filename, vf='scale=-1:720').run()
21
+ video_path = output_filename
22
+ else:
23
+ video_path = video
24
 
25
+ if not os.path.exists(video_path):
26
+ return f"Error: {video_path} does not exist."
 
27
 
28
+ audio_output = os.path.join(temp_dir, "output_audio.wav")
29
+ ffmpeg.input(video_path).output(audio_output, acodec='pcm_s24le', ar=48000, map='a', y=True).run()
30
 
31
+ y, sr = sf.read("output_audio.wav")
32
+ y = y.astype(np.float32)
33
+ y_denoised = wiener(y)
34
+ sf.write("output_audio_denoised.wav", y_denoised, sr)
35
+
36
+ sound = AudioSegment.from_file("output_audio_denoised.wav", format="wav")
37
+ sound = sound.apply_gain(0) # Reduce gain by 5 dB
38
+ sound = sound.low_pass_filter(3000).high_pass_filter(100)
39
+ sound.export("output_audio_processed.wav", format="wav")
40
+
41
+ shell_command = f"ffmpeg -y -i output_audio_processed.wav -af lowpass=3000,highpass=100 output_audio_final.wav".split(" ")
42
+ subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
43
+
44
+ model = whisper.load_model("base")
45
+ result = model.transcribe("output_audio_final.wav")
46
+ whisper_text = result["text"]
47
+ whisper_language = result['language']
48
+
49
+ language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
50
+ target_language_code = language_mapping[target_language]
51
+ translator = Translator()
52
+ translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
53
+
54
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v1")
55
+ tts.to('cuda') # Replacing deprecated gpu=True
56
+ tts.tts_to_file(translated_text, speaker_wav='output_audio_final.wav', file_path="output_synth.wav", language=target_language_code)
57
+
58
+ pad_top = 0
59
+ pad_bottom = 15
60
+ pad_left = 0
61
+ pad_right = 0
62
+ rescaleFactor = 1
63
+
64
+ video_path_fix = video_path
65
+
66
+ cmd = f"python Wav2Lip/inference.py --checkpoint_path '/Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path_fix)} --audio 'output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile 'output_video.mp4'"
67
+ subprocess.run(cmd, shell=True)
68
+ # Debugging Step 3: Check if output video exists
69
+ if not os.path.exists("output_video.mp4"):
70
+ return "Error: output_video.mp4 was not generated."
71
+
72
+ return "output_video.mp4"
 
73
 
74
  iface = gr.Interface(
75
  fn=process_video,