razaraees10 commited on
Commit
a8c4113
1 Parent(s): a51bbe9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -134
app.py CHANGED
@@ -1,153 +1,125 @@
1
  import streamlit as st
2
  from TTS.api import TTS
3
  import torch
4
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True).to("cuda")
5
- # Dependencies
6
- %cd /content/
7
-
8
  import locale
9
- locale.getpreferredencoding = lambda: "UTF-8"
10
-
11
- !git clone https://github.com/justinjohn0306/Wav2Lip
12
- !cd Wav2Lip && pip install -r requirements_colab.txt
13
-
14
- %cd /content/Wav2Lip
15
-
16
- !wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "face_detection/detection/sfd/s3fd.pth"
17
- !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth' -O 'checkpoints/wav2lip.pth'
18
- !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth' -O 'checkpoints/wav2lip_gan.pth'
19
- !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth' -O 'checkpoints/resnet50.pth'
20
- !wget 'https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth' -O 'checkpoints/mobilenet.pth'
21
-
22
  import subprocess
23
  import assemblyai as aai
24
  import requests
25
  import moviepy.editor as mp
26
 
27
- class translation:
28
- def __init__(self,video_path,original_language,target_language):
29
- self.video_path= video_path
30
- self.original_language = original_language
31
- self.target_language = target_language
32
-
33
 
 
 
 
 
 
 
34
 
35
- def org_language_parameters(self,original_language):
36
  if original_language == 'English':
37
- self.lan_code='en'
38
- elif original_language =='German':
39
- self.lan_code='de'
40
- elif original_language =='French':
41
- self.lan_code='fr'
42
- elif original_language =='Spanish':
43
- self.lan_code='es'
44
  else:
45
  self.lan_code = ''
46
 
47
- def target_language_parameters(self,target_language):
48
  if target_language == 'English':
49
- self.tran_code='en'
50
- elif target_language =='German':
51
- self.tran_code='de'
52
- elif target_language =='French':
53
- self.tran_code='fr'
54
- elif target_language =='Spanish':
55
- self.tran_code='es'
56
  else:
57
  self.tran_code = ''
58
 
59
- def extract_audio(self):
60
- video = mp.VideoFileClip(self.video_path)
61
- audio = video.audio
62
- audio_path = "output_audio.wav"
63
- audio.write_audiofile(audio_path)
64
- print("Audio extracted successfully!")
65
- return audio_path
66
- def transcribe_audio(self,audio_path):
67
- aai.settings.api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
68
- config = aai.TranscriptionConfig(language_code=self.lan_code)
69
- transcriber = aai.Transcriber(config=config)
70
- transcript = transcriber.transcribe(audio_path)
71
- transcript_text = transcript.text
72
- return transcript_text
73
- if transcript.status == aai.TranscriptStatus.error:
74
- print(transcript.error)
75
- return None
76
-
77
-
78
- def translate_text(self,transcript_text):
79
- base_url = "https://api.cognitive.microsofttranslator.com"
80
- endpoint = "/translate"
81
- headers = {
82
- "Ocp-Apim-Subscription-Key": "cd226bb1f3644276bea01d82dd861cbb",
83
- "Content-Type": "application/json",
84
- "Ocp-Apim-Subscription-Region": "southeastasia"
85
- }
86
- params = {
87
- "api-version": "3.0",
88
- "from": self.lan_code,
89
- "to": self.tran_code
90
- }
91
- body = [{"text": transcript_text}]
92
- response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
93
- response.raise_for_status()
94
- translation = response.json()[0]["translations"][0]["text"]
95
- return translation
96
-
97
-
98
-
99
-
100
- #generate audio
101
- def generate_audio(self,translated_text):
102
- tts.tts_to_file(translated_text,
103
- speaker_wav='output_audio.wav',
104
- file_path="output_synth.wav",
105
- language= self.tran_code
106
- )
107
- return "output_synth.wav"
108
-
109
- def translate_video(self):
110
- audio_path = self.extract_audio()
111
- self.org_language_parameters(self.original_language)
112
- self.target_language_parameters(self.target_language)
113
- transcript_text = self.transcribe_audio(audio_path)
114
- translated_text = self.translate_text(transcript_text)
115
- translated_audio_path = self.generate_audio(translated_text)
116
- #Generate video
117
- %cd /content/Wav2Lip
118
-
119
- #This is the detection box padding, if you see it doesnt sit quite right, just adjust the values a bit. Usually the bottom one is the biggest issue
120
- pad_top = 0
121
- pad_bottom = 15
122
- pad_left = 0
123
- pad_right = 0
124
- rescaleFactor = 1
125
- video_path_fix = f"'../{video_path}'"
126
- audio_path_fix = f"'../{translated_audio_path}'"
127
- !python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face $video_path_fix --audio $audio_path_fix --pads $pad_top $pad_bottom $pad_left $pad_right --resize_factor $rescaleFactor --nosmooth --outfile '/content/output_video.mp4'
128
-
129
- # Example usage:
130
- %cd /content/
131
- video_path = "video.mp4"
132
- target_language = "German"
133
- original_language = "English"
134
- translator = translation(video_path,original_language,target_language)
135
- translator.translate_video()
136
- if __name__ == "__main__":
137
- pass
138
-
139
-
140
-
141
-
142
-
143
-
144
-
145
-
146
-
147
-
148
-
149
-
150
-
151
-
152
-
153
-
 
1
  import streamlit as st
2
  from TTS.api import TTS
3
  import torch
 
 
 
 
4
  import locale
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import subprocess
6
  import assemblyai as aai
7
  import requests
8
  import moviepy.editor as mp
9
 
10
+ # Load TTS model
11
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True).to("cuda")
 
 
 
 
12
 
13
+ # Function to translate video
14
+ class Translation:
15
+ def __init__(self, video_path, original_language, target_language):
16
+ self.video_path = video_path
17
+ self.original_language = original_language
18
+ self.target_language = target_language
19
 
20
+ def org_language_parameters(self, original_language):
21
  if original_language == 'English':
22
+ self.lan_code = 'en'
23
+ elif original_language == 'German':
24
+ self.lan_code = 'de'
25
+ elif original_language == 'French':
26
+ self.lan_code = 'fr'
27
+ elif original_language == 'Spanish':
28
+ self.lan_code = 'es'
29
  else:
30
  self.lan_code = ''
31
 
32
+ def target_language_parameters(self, target_language):
33
  if target_language == 'English':
34
+ self.tran_code = 'en'
35
+ elif target_language == 'German':
36
+ self.tran_code = 'de'
37
+ elif target_language == 'French':
38
+ self.tran_code = 'fr'
39
+ elif target_language == 'Spanish':
40
+ self.tran_code = 'es'
41
  else:
42
  self.tran_code = ''
43
 
44
+ def extract_audio(self):
45
+ video = mp.VideoFileClip(self.video_path)
46
+ audio = video.audio
47
+ audio_path = "output_audio.wav"
48
+ audio.write_audiofile(audio_path)
49
+ st.success("Audio extracted successfully!")
50
+ return audio_path
51
+
52
+ def transcribe_audio(self, audio_path):
53
+ aai.settings.api_key = "c29eb650444a4ae4bea01d82dd861cbb"
54
+ config = aai.TranscriptionConfig(language_code=self.lan_code)
55
+ transcriber = aai.Transcriber(config=config)
56
+ transcript = transcriber.transcribe(audio_path)
57
+ transcript_text = transcript.text
58
+ return transcript_text
59
+
60
+ def translate_text(self, transcript_text):
61
+ base_url = "https://api.cognitive.microsofttranslator.com"
62
+ endpoint = "/translate"
63
+ headers = {
64
+ "Ocp-Apim-Subscription-Key": "cd226bb1f3644276bea01d82dd861cbb",
65
+ "Content-Type": "application/json",
66
+ "Ocp-Apim-Subscription-Region": "southeastasia"
67
+ }
68
+ params = {
69
+ "api-version": "3.0",
70
+ "from": self.lan_code,
71
+ "to": self.tran_code
72
+ }
73
+ body = [{"text": transcript_text}]
74
+ response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
75
+ response.raise_for_status()
76
+ translation = response.json()[0]["translations"][0]["text"]
77
+ return translation
78
+
79
+ def generate_audio(self, translated_text):
80
+ tts.tts_to_file(translated_text,
81
+ speaker_wav='output_audio.wav',
82
+ file_path="output_synth.wav",
83
+ language=self.tran_code
84
+ )
85
+ return "output_synth.wav"
86
+
87
+ def translate_video(self):
88
+ audio_path = self.extract_audio()
89
+ self.org_language_parameters(self.original_language)
90
+ self.target_language_parameters(self.target_language)
91
+ transcript_text = self.transcribe_audio(audio_path)
92
+ translated_text = self.translate_text(transcript_text)
93
+ translated_audio_path = self.generate_audio(translated_text)
94
+
95
+ pad_top = 0
96
+ pad_bottom = 15
97
+ pad_left = 0
98
+ pad_right = 0
99
+ rescaleFactor = 1
100
+ video_path_fix = f"'{self.video_path}'"
101
+ audio_path_fix = f"'{translated_audio_path}'"
102
+ subprocess.run(['python', 'inference.py', '--checkpoint_path', 'checkpoints/wav2lip_gan.pth',
103
+ '--face', video_path_fix, '--audio', audio_path_fix, '--pads',
104
+ str(pad_top), str(pad_bottom), str(pad_left), str(pad_right),
105
+ '--resize_factor', str(rescaleFactor), '--nosmooth', '--outfile', 'output_video.mp4'])
106
+
107
+
108
+ st.title("Translate Your Video")
109
+ st.write("Upload your video and select the original and target languages.")
110
+
111
+ # Upload video
112
+ video_file = st.file_uploader("Upload Video", type=["mp4"])
113
+
114
+ if video_file is not None:
115
+ # Get original and target languages
116
+ original_language = st.selectbox("Select Original Language", ['English', 'German', 'French', 'Spanish'])
117
+ target_language = st.selectbox("Select Target Language", ['English', 'German', 'French', 'Spanish'])
118
+
119
+ translation = Translation(video_path=video_file.name,
120
+ original_language=original_language,
121
+ target_language=target_language)
122
+
123
+ if st.button("Translate"):
124
+ translation.translate_video()
125
+ st.success("Video translation complete! You can download the translated video.")