razaraees10 commited on
Commit
ece3d5d
1 Parent(s): 94e8cbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +153 -0
app.py CHANGED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from TTS.api import TTS
3
+ import torch
4
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True).to("cuda")
5
+ # Dependencies
6
+ %cd /content/
7
+
8
+ import locale
9
+ locale.getpreferredencoding = lambda: "UTF-8"
10
+
11
+ !git clone https://github.com/justinjohn0306/Wav2Lip
12
+ !cd Wav2Lip && pip install -r requirements_colab.txt
13
+
14
+ %cd /content/Wav2Lip
15
+
16
+ !wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "face_detection/detection/sfd/s3fd.pth"
17
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'
18
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'
19
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'
20
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'
21
+
22
+ import subprocess
23
+ import assemblyai as aai
24
+ import requests
25
+ import moviepy.editor as mp
26
+
27
+ class translation:
28
+ def __init__(self,video_path,original_language,target_language):
29
+ self.video_path= video_path
30
+ self.original_language = original_language
31
+ self.target_language = target_language
32
+
33
+
34
+
35
+ def org_language_parameters(self,original_language):
36
+ if original_language == 'English':
37
+ self.lan_code='en'
38
+ elif original_language =='German':
39
+ self.lan_code='de'
40
+ elif original_language =='French':
41
+ self.lan_code='fr'
42
+ elif original_language =='Spanish':
43
+ self.lan_code='es'
44
+ else:
45
+ self.lan_code = ''
46
+
47
+ def target_language_parameters(self,target_language):
48
+ if target_language == 'English':
49
+ self.tran_code='en'
50
+ elif target_language =='German':
51
+ self.tran_code='de'
52
+ elif target_language =='French':
53
+ self.tran_code='fr'
54
+ elif target_language =='Spanish':
55
+ self.tran_code='es'
56
+ else:
57
+ self.tran_code = ''
58
+
59
+ def extract_audio(self):
60
+ video = mp.VideoFileClip(self.video_path)
61
+ audio = video.audio
62
+ audio_path = "output_audio.wav"
63
+ audio.write_audiofile(audio_path)
64
+ print("Audio extracted successfully!")
65
+ return audio_path
66
+ def transcribe_audio(self,audio_path):
67
+ aai.settings.api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
68
+ config = aai.TranscriptionConfig(language_code=self.lan_code)
69
+ transcriber = aai.Transcriber(config=config)
70
+ transcript = transcriber.transcribe(audio_path)
71
+ transcript_text = transcript.text
72
+ return transcript_text
73
+ if transcript.status == aai.TranscriptStatus.error:
74
+ print(transcript.error)
75
+ return None
76
+
77
+
78
+ def translate_text(self,transcript_text):
79
+ base_url = "https://api.cognitive.microsofttranslator.com"
80
+ endpoint = "/translate"
81
+ headers = {
82
+ "Ocp-Apim-Subscription-Key": "cd226bb1f3644276bea01d82dd861cbb",
83
+ "Content-Type": "application/json",
84
+ "Ocp-Apim-Subscription-Region": "southeastasia"
85
+ }
86
+ params = {
87
+ "api-version": "3.0",
88
+ "from": self.lan_code,
89
+ "to": self.tran_code
90
+ }
91
+ body = [{"text": transcript_text}]
92
+ response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
93
+ response.raise_for_status()
94
+ translation = response.json()[0]["translations"][0]["text"]
95
+ return translation
96
+
97
+
98
+
99
+
100
+ #generate audio
101
+ def generate_audio(self,translated_text):
102
+ tts.tts_to_file(translated_text,
103
+ speaker_wav='output_audio.wav',
104
+ file_path="output_synth.wav",
105
+ language= self.tran_code
106
+ )
107
+ return "output_synth.wav"
108
+
109
+ def translate_video(self):
110
+ audio_path = self.extract_audio()
111
+ self.org_language_parameters(self.original_language)
112
+ self.target_language_parameters(self.target_language)
113
+ transcript_text = self.transcribe_audio(audio_path)
114
+ translated_text = self.translate_text(transcript_text)
115
+ translated_audio_path = self.generate_audio(translated_text)
116
+ #Generate video
117
+ %cd /content/Wav2Lip
118
+
119
+ #This is the detection box padding, if you see it doesnt sit quite right, just adjust the values a bit. Usually the bottom one is the biggest issue
120
+ pad_top = 0
121
+ pad_bottom = 15
122
+ pad_left = 0
123
+ pad_right = 0
124
+ rescaleFactor = 1
125
+ video_path_fix = f"'../{video_path}'"
126
+ audio_path_fix = f"'../{translated_audio_path}'"
127
+ !python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face $video_path_fix --audio $audio_path_fix --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth --outfile '/content/output_video.mp4'
128
+
129
+ # Example usage:
130
+ %cd /content/
131
+ video_path = "video.mp4"
132
+ target_language = "German"
133
+ original_language = "English"
134
+ translator = translation(video_path,original_language,target_language)
135
+ translator.translate_video()
136
+ if __name__ == "__main__":
137
+ pass
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+