razaraees10 commited on
Commit
2355e92
1 Parent(s): f9a3150

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +165 -0
app.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from TTS.api import TTS
2
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True).to("cuda")
3
+ # Dependencies
4
+ %cd /content/
5
+
6
+ import locale
7
+ locale.getpreferredencoding = lambda: "UTF-8"
8
+
9
+ !git clone https://github.com/justinjohn0306/Wav2Lip
10
+ !cd Wav2Lip && pip install -r requirements_colab.txt
11
+
12
+ %cd /content/Wav2Lip
13
+
14
+ !wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "face_detection/detection/sfd/s3fd.pth"
15
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'
16
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'
17
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'
18
+ !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'
19
+ import subprocess
20
+ import assemblyai as aai
21
+ import requests
22
+ import moviepy.editor as mp
23
+
24
+ class translation:
25
+ def __init__(self,video_path,original_language,target_language):
26
+ self.video_path= video_path
27
+ self.original_language = original_language
28
+ self.target_language = target_language
29
+
30
+
31
+
32
+ def org_language_parameters(self,original_language):
33
+ if original_language == 'English':
34
+ self.lan_code='en'
35
+ elif original_language =='German':
36
+ self.lan_code='de'
37
+ elif original_language =='French':
38
+ self.lan_code='fr'
39
+ elif original_language =='Spanish':
40
+ self.lan_code='es'
41
+ else:
42
+ self.lan_code = ''
43
+
44
+ def target_language_parameters(self,target_language):
45
+ if target_language == 'English':
46
+ self.tran_code='en'
47
+ elif target_language =='German':
48
+ self.tran_code='de'
49
+ elif target_language =='French':
50
+ self.tran_code='fr'
51
+ elif target_language =='Spanish':
52
+ self.tran_code='es'
53
+ else:
54
+ self.tran_code = ''
55
+
56
+ def extract_audio(self):
57
+ video = mp.VideoFileClip(self.video_path)
58
+ audio = video.audio
59
+ audio_path = "output_audio.wav"
60
+ audio.write_audiofile(audio_path)
61
+ print("Audio extracted successfully!")
62
+ return audio_path
63
+
64
+
65
+ def transcribe_audio(self,audio_path):
66
+ aai.settings.api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
67
+ config = aai.TranscriptionConfig(language_code=self.lan_code)
68
+ transcriber = aai.Transcriber(config=config)
69
+ transcript = transcriber.transcribe(audio_path)
70
+ transcript_text = transcript.text
71
+ return transcript_text
72
+ if transcript.status == aai.TranscriptStatus.error:
73
+ print(transcript.error)
74
+ return None
75
+
76
+
77
+ def translate_text(self,transcript_text):
78
+ base_url = "https://api.cognitive.microsofttranslator.com"
79
+ endpoint = "/translate"
80
+ headers = {
81
+ "Ocp-Apim-Subscription-Key": "cd226bb1f3644276bea01d82dd861cbb",
82
+ "Content-Type": "application/json",
83
+ "Ocp-Apim-Subscription-Region": "southeastasia"
84
+ }
85
+ params = {
86
+ "api-version": "3.0",
87
+ "from": self.lan_code,
88
+ "to": self.tran_code
89
+ }
90
+ body = [{"text": transcript_text}]
91
+ response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
92
+ response.raise_for_status()
93
+ translation = response.json()[0]["translations"][0]["text"]
94
+ return translation
95
+
96
+
97
+
98
+
99
+ #generate audio
100
+ def generate_audio(self,translated_text):
101
+ tts.tts_to_file(translated_text,
102
+ speaker_wav='output_audio.wav',
103
+ file_path="output_synth.wav",
104
+ language= self.tran_code
105
+ )
106
+ return "output_synth.wav"
107
+
108
+ def translate_video(self):
109
+ audio_path = self.extract_audio()
110
+ self.org_language_parameters(self.original_language)
111
+ self.target_language_parameters(self.target_language)
112
+ transcript_text = self.transcribe_audio(audio_path)
113
+ translated_text = self.translate_text(transcript_text)
114
+ translated_audio_path = self.generate_audio(translated_text)
115
+ #Generate video
116
+ %cd /content/Wav2Lip
117
+
118
+ #This is the detection box padding, if you see it doesnt sit quite right, just adjust the values a bit. Usually the bottom one is the biggest issue
119
+ pad_top = 0
120
+ pad_bottom = 15
121
+ pad_left = 0
122
+ pad_right = 0
123
+ rescaleFactor = 1
124
+ video_path_fix = f"'../{self.video_path}'"
125
+ audio_path_fix = f"'../{translated_audio_path}'"
126
+ !python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face $video_path_fix --audio $audio_path_fix --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth --outfile '/content/output_video.mp4'
127
+ return '/content/output_video.mp4' # Return the path to the translated video file
128
+ # return '/content/output_video.mp4', open('/content/output_video.mp4', 'rb') # Return the path and file object of the translated video file
129
+
130
+ # from translator import translation # Import the Translator class from translate module
131
+ import gradio as gr
132
+ import os
133
+ from google.colab import files
134
+
135
+
136
+ def app(video_path, original_language, target_language):
137
+ %cd /content/
138
+
139
+ video_name = os.path.basename(video_path)
140
+
141
+ # Save the uploaded file to the content folder in Colab
142
+ with open(video_name, "wb") as f:
143
+ with open(video_path, "rb") as uploaded_file:
144
+ f.write(uploaded_file.read())
145
+
146
+
147
+ translator = translation(video_name, original_language, target_language)
148
+ video_file = translator.translate_video()
149
+
150
+
151
+ return video_file
152
+
153
+ interface_video_file = gr.Interface(
154
+ fn=app,
155
+ inputs=[
156
+ gr.Video(label="Video Path"),
157
+ gr.Dropdown(["English", "German", "French", "Spanish"], label="Original Language"),
158
+ gr.Dropdown(["English", "German", "French", "Spanish"], label="Targeted Language"),
159
+ ],
160
+ outputs=gr.Video(label="Translated Video")
161
+ )
162
+
163
+ interface_video_file.launch(debug=True)
164
+
165
+