Salman11223 commited on
Commit
34f4ff9
1 Parent(s): ab60412

Update translate.py

Browse files
Files changed (1) hide show
  1. translate.py +264 -110
translate.py CHANGED
@@ -6,23 +6,15 @@ import azure.cognitiveservices.speech as speechsdk
6
  from moviepy.editor import AudioFileClip
7
  from gradio_client import Client
8
 
 
9
  class Translate:
10
- def __init__(self, video_path, target_language,original_language,speaking_rate):
11
  self.video_path = video_path
12
  self.target_language = target_language
13
  self.original_language=original_language
14
  self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
15
- self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161"
16
  self.translation_api_key = "394833878dd54214886cd81a35ac35dc"
17
  self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f"
18
- self.speaking_rate= speaking_rate
19
- self.print_parameters()
20
-
21
- def print_parameters(self):
22
- print("Video_Path" , self.video_path)
23
- print("original_language" , self.original_language)
24
- print("target_language" , self.target_language)
25
- print("speaking_rate" , self.speaking_rate)
26
 
27
  def extract_audio(self):
28
  aai.settings.api_key = self.aai_api_key
@@ -33,17 +25,6 @@ class Translate:
33
  print("Audio extracted successfully!")
34
  return audio_path
35
 
36
- def gender_detection(self):
37
- # gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/"
38
- # gender_client = Client(gender_model_url)
39
- # gender = gender_client.predict(
40
- # 'audio.wav', api_name="/predict"
41
- # )
42
- # print(gender)
43
- # return gender
44
- return "male"
45
-
46
-
47
 
48
  def org_language_parameters(self,original_language):
49
  if original_language == 'English':
@@ -57,27 +38,23 @@ class Translate:
57
  else:
58
  self.lan_code = ''
59
 
60
- def set_language_parameters(self, target_language, detected_gender):
 
61
  if target_language == 'English':
62
  self.language_code = 'en-US'
63
  self.trans_code = 'en'
64
- self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural'
65
  elif target_language == 'German':
66
  self.language_code = 'de-DE'
67
  self.trans_code = 'de'
68
- self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural'
69
  elif target_language == 'French':
70
  self.language_code = 'fr-CA'
71
  self.trans_code = 'fr'
72
- self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural'
73
  elif target_language == 'Spanish':
74
  self.language_code = 'es-ES'
75
  self.trans_code = 'es'
76
- self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural'
77
  elif target_language == 'Urdu':
78
  self.language_code = 'ur-PK'
79
  self.trans_code = 'ur'
80
- self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural'
81
  else:
82
  # Handle unsupported languages or set default values
83
  self.voice_names = []
@@ -96,10 +73,7 @@ class Translate:
96
 
97
 
98
 
99
- def get_audio_duration(self, audio_path):
100
- audio_clip = AudioFileClip(audio_path)
101
- audio_duration = audio_clip.duration
102
- return audio_duration
103
 
104
  def transcribe_audio(self, audio_path):
105
  aai.settings.api_key = self.aai_api_key
@@ -114,15 +88,7 @@ class Translate:
114
  file.write(transcript.text)
115
 
116
 
117
- def generate_ssml(self, text, speaking_rate):
118
- # Construct SSML with the given text, speaking rate, voice name, and language code
119
- return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self.language_code}"><voice name="{self.voice_names}"><prosody rate="{speaking_rate}">{text}</prosody></voice></speak>'
120
- def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate):
121
- ssml = self.generate_ssml(text, speaking_rate)
122
- speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion)
123
- audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file)
124
- speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
125
- speech_synthesizer.speak_ssml_async(ssml).get()
126
 
127
  def translate_text(self, text):
128
  base_url = "https://api.cognitive.microsofttranslator.com"
@@ -144,83 +110,271 @@ class Translate:
144
  translation = response.json()[0]["translations"][0]["text"]
145
  return translation
146
 
 
 
 
147
  def transcribe_and_translate(self):
148
  audio_path = self.extract_audio()
149
  self.org_language_parameters(self.original_language)
150
  self.transcribe_audio(audio_path)
151
- gender = self.gender_detection()
152
- print("Detected Gender:", gender)
153
- self.set_language_parameters(self.target_language,gender)
154
- with open("transcript.srt", 'r') as srt_file:
155
- original_srt_content = srt_file.read()
156
-
157
- original_subtitles = original_srt_content.strip().split('\n\n')
158
-
159
- translated_subtitles = []
160
- for subtitle in original_subtitles:
161
- lines = subtitle.split('\n')
162
- sequence_number = lines[0]
163
- timestamp = lines[1]
164
- original_text = '\n'.join(lines[2:])
165
- translated_text = self.translate_text(original_text)
166
- translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}"
167
- translated_subtitles.append(translated_subtitle)
168
-
169
- translated_srt_content = '\n\n'.join(translated_subtitles)
170
- translated_srt_path = "translated_file.srt"
171
-
172
- with open(translated_srt_path, 'w', encoding='utf-8') as srt_file:
173
- srt_file.write(translated_srt_content)
174
-
175
- # Loop through each translated subtitle and generate speech
176
- translated_audio_paths = []
177
- for subtitle in translated_subtitles:
178
- lines = subtitle.split('\n')
179
- sequence_number = lines[0]
180
- timestamp = lines[1]
181
- translated_text = '\n'.join(lines[2:])
182
- translated_audio_path = f"translated_audio_{sequence_number}.wav"
183
- self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate)
184
- translated_audio_paths.append(translated_audio_path)
185
-
186
- # Create a list to store the audio clips
187
- translated_audio_clips = []
188
-
189
- # Loop through each translated audio path and create an AudioFileClip
190
- for audio_path in translated_audio_paths:
191
- translated_audio_clip = mp.AudioFileClip(audio_path)
192
- translated_audio_clips.append(translated_audio_clip)
193
-
194
- # Concatenate the translated audio clips into a single audio file
195
- translated_audio = mp.concatenate_audioclips(translated_audio_clips)
196
-
197
- # Define the output audio file path
198
- output_audio_path = "translated_audio.wav"
199
-
200
- # Write the concatenated translated audio to the output file
201
- translated_audio.write_audiofile(output_audio_path)
202
-
203
- # Load the original video
204
- video = mp.VideoFileClip(self.video_path)
205
 
206
- # Load the translated audio
207
- translated_audio = mp.AudioFileClip(output_audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
- # Set the audio of the video to the translated audio
210
- video = video.set_audio(translated_audio)
211
 
212
- # Define the output video file path
213
- output_video_path = "translated_video.mp4"
214
 
215
- # Write the video with translated audio to the output file
216
- video.write_videofile(output_video_path, codec="libx264", audio_codec="aac")
 
 
 
 
 
 
 
 
 
217
 
218
- # Clean up temporary files
219
- self.cleanup_temp_files()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- def cleanup_temp_files(self):
222
- temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)] # Adjust the range accordingly
223
- for file in temp_files:
224
- if os.path.exists(file):
225
- os.remove(file)
226
- print(f"Deleted {file}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  from moviepy.editor import AudioFileClip
7
  from gradio_client import Client
8
 
9
+
10
  class Translate:
11
+ def __init__(self, video_path, target_language,original_language):
12
  self.video_path = video_path
13
  self.target_language = target_language
14
  self.original_language=original_language
15
  self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
 
16
  self.translation_api_key = "394833878dd54214886cd81a35ac35dc"
17
  self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f"
 
 
 
 
 
 
 
 
18
 
19
  def extract_audio(self):
20
  aai.settings.api_key = self.aai_api_key
 
25
  print("Audio extracted successfully!")
26
  return audio_path
27
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def org_language_parameters(self,original_language):
30
  if original_language == 'English':
 
38
  else:
39
  self.lan_code = ''
40
 
41
+
42
+ def set_language_parameters(self, target_language):
43
  if target_language == 'English':
44
  self.language_code = 'en-US'
45
  self.trans_code = 'en'
 
46
  elif target_language == 'German':
47
  self.language_code = 'de-DE'
48
  self.trans_code = 'de'
 
49
  elif target_language == 'French':
50
  self.language_code = 'fr-CA'
51
  self.trans_code = 'fr'
 
52
  elif target_language == 'Spanish':
53
  self.language_code = 'es-ES'
54
  self.trans_code = 'es'
 
55
  elif target_language == 'Urdu':
56
  self.language_code = 'ur-PK'
57
  self.trans_code = 'ur'
 
58
  else:
59
  # Handle unsupported languages or set default values
60
  self.voice_names = []
 
73
 
74
 
75
 
76
+
 
 
 
77
 
78
  def transcribe_audio(self, audio_path):
79
  aai.settings.api_key = self.aai_api_key
 
88
  file.write(transcript.text)
89
 
90
 
91
+
 
 
 
 
 
 
 
 
92
 
93
  def translate_text(self, text):
94
  base_url = "https://api.cognitive.microsofttranslator.com"
 
110
  translation = response.json()[0]["translations"][0]["text"]
111
  return translation
112
 
113
+
114
+
115
+
116
  def transcribe_and_translate(self):
117
  audio_path = self.extract_audio()
118
  self.org_language_parameters(self.original_language)
119
  self.transcribe_audio(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
+ self.set_language_parameters(self.target_language)
122
+ # Assuming t.txt contains the original text
123
+ with open("t.txt", 'r', encoding='utf-8') as text_file:
124
+ original_text = text_file.read()
125
+ self.org_language_parameters(self.original_language)
126
+ # Translate the entire original text
127
+ translated_text = self.translate_text(original_text)
128
+
129
+ # Write the translated text to a new text file
130
+ translated_text_path = "translated_text.txt"
131
+ with open(translated_text_path, 'w', encoding='utf-8') as translated_file:
132
+ translated_file.write(translated_text)
133
+
134
+ print("Translation complete. Translated text saved to:", translated_text_path)
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+ # class Translate:
164
+ # def __init__(self, video_path, target_language,original_language,speaking_rate):
165
+ # self.video_path = video_path
166
+ # self.target_language = target_language
167
+ # self.original_language=original_language
168
+ # self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
169
+ # self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161"
170
+ # self.translation_api_key = "394833878dd54214886cd81a35ac35dc"
171
+ # self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f"
172
+ # self.speaking_rate= speaking_rate
173
+ # self.print_parameters()
174
+
175
+ # def print_parameters(self):
176
+ # print("Video_Path" , self.video_path)
177
+ # print("original_language" , self.original_language)
178
+ # print("target_language" , self.target_language)
179
+ # print("speaking_rate" , self.speaking_rate)
180
+
181
+ # def extract_audio(self):
182
+ # aai.settings.api_key = self.aai_api_key
183
+ # video = mp.VideoFileClip(self.video_path)
184
+ # audio = video.audio
185
+ # audio_path = "audio.wav"
186
+ # audio.write_audiofile(audio_path)
187
+ # print("Audio extracted successfully!")
188
+ # return audio_path
189
+
190
+ # def gender_detection(self):
191
+ # # gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/"
192
+ # # gender_client = Client(gender_model_url)
193
+ # # gender = gender_client.predict(
194
+ # # 'audio.wav', api_name="/predict"
195
+ # # )
196
+ # # print(gender)
197
+ # # return gender
198
+ # return "male"
199
 
 
 
200
 
 
 
201
 
202
+ # def org_language_parameters(self,original_language):
203
+ # if original_language == 'English':
204
+ # self.lan_code='en'
205
+ # elif original_language =='German':
206
+ # self.lan_code='de'
207
+ # elif original_language =='French':
208
+ # self.lan_code='fr'
209
+ # elif original_language =='Spanish':
210
+ # self.lan_code='es'
211
+ # else:
212
+ # self.lan_code = ''
213
 
214
+ # def set_language_parameters(self, target_language, detected_gender):
215
+ # if target_language == 'English':
216
+ # self.language_code = 'en-US'
217
+ # self.trans_code = 'en'
218
+ # self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural'
219
+ # elif target_language == 'German':
220
+ # self.language_code = 'de-DE'
221
+ # self.trans_code = 'de'
222
+ # self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural'
223
+ # elif target_language == 'French':
224
+ # self.language_code = 'fr-CA'
225
+ # self.trans_code = 'fr'
226
+ # self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural'
227
+ # elif target_language == 'Spanish':
228
+ # self.language_code = 'es-ES'
229
+ # self.trans_code = 'es'
230
+ # self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural'
231
+ # elif target_language == 'Urdu':
232
+ # self.language_code = 'ur-PK'
233
+ # self.trans_code = 'ur'
234
+ # self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural'
235
+ # else:
236
+ # # Handle unsupported languages or set default values
237
+ # self.voice_names = []
238
+ # self.language_code = ''
239
+ # self.trans_code = ''
240
 
241
+
242
+ # print("Target Language:", target_language)
243
+ # print("Trans Code:", self.trans_code)
244
+
245
+ # def get_voice_names(self):
246
+ # return self.voice_names
247
+
248
+ # def get_language_code(self):
249
+ # return self.language_code
250
+
251
+
252
+
253
+ # def get_audio_duration(self, audio_path):
254
+ # audio_clip = AudioFileClip(audio_path)
255
+ # audio_duration = audio_clip.duration
256
+ # return audio_duration
257
+
258
+ # def transcribe_audio(self, audio_path):
259
+ # aai.settings.api_key = self.aai_api_key
260
+ # config = aai.TranscriptionConfig(self.lan_code)
261
+ # transcriber = aai.Transcriber(config=config)
262
+ # transcript = transcriber.transcribe(audio_path)
263
+ # file_path = "transcript.srt"
264
+ # filepath = "t.txt"
265
+ # with open(file_path, "w") as file:
266
+ # file.write(transcript.export_subtitles_srt())
267
+ # with open(filepath, "w") as file:
268
+ # file.write(transcript.text)
269
+
270
+
271
+ # def generate_ssml(self, text, speaking_rate):
272
+ # # Construct SSML with the given text, speaking rate, voice name, and language code
273
+ # return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self.language_code}"><voice name="{self.voice_names}"><prosody rate="{speaking_rate}">{text}</prosody></voice></speak>'
274
+ # def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate):
275
+ # ssml = self.generate_ssml(text, speaking_rate)
276
+ # speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion)
277
+ # audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file)
278
+ # speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
279
+ # speech_synthesizer.speak_ssml_async(ssml).get()
280
+
281
+ # def translate_text(self, text):
282
+ # base_url = "https://api.cognitive.microsofttranslator.com"
283
+ # endpoint = "/translate"
284
+ # headers = {
285
+ # "Ocp-Apim-Subscription-Key": self.translation_api_key,
286
+ # "Content-Type": "application/json",
287
+ # "Ocp-Apim-Subscription-Region": "southeastasia"
288
+ # }
289
+
290
+ # params = {
291
+ # "api-version": "3.0",
292
+ # "to": self.trans_code
293
+ # }
294
+ # body = [{"text": text}]
295
+
296
+ # response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
297
+ # response.raise_for_status()
298
+ # translation = response.json()[0]["translations"][0]["text"]
299
+ # return translation
300
+
301
+ # def transcribe_and_translate(self):
302
+ # audio_path = self.extract_audio()
303
+ # self.org_language_parameters(self.original_language)
304
+ # self.transcribe_audio(audio_path)
305
+ # gender = self.gender_detection()
306
+ # print("Detected Gender:", gender)
307
+ # self.set_language_parameters(self.target_language,gender)
308
+ # with open("transcript.srt", 'r') as srt_file:
309
+ # original_srt_content = srt_file.read()
310
+
311
+ # original_subtitles = original_srt_content.strip().split('\n\n')
312
+
313
+ # translated_subtitles = []
314
+ # for subtitle in original_subtitles:
315
+ # lines = subtitle.split('\n')
316
+ # sequence_number = lines[0]
317
+ # timestamp = lines[1]
318
+ # original_text = '\n'.join(lines[2:])
319
+ # translated_text = self.translate_text(original_text)
320
+ # translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}"
321
+ # translated_subtitles.append(translated_subtitle)
322
+
323
+ # translated_srt_content = '\n\n'.join(translated_subtitles)
324
+ # translated_srt_path = "translated_file.srt"
325
+
326
+ # with open(translated_srt_path, 'w', encoding='utf-8') as srt_file:
327
+ # srt_file.write(translated_srt_content)
328
+
329
+ # # Loop through each translated subtitle and generate speech
330
+ # translated_audio_paths = []
331
+ # for subtitle in translated_subtitles:
332
+ # lines = subtitle.split('\n')
333
+ # sequence_number = lines[0]
334
+ # timestamp = lines[1]
335
+ # translated_text = '\n'.join(lines[2:])
336
+ # translated_audio_path = f"translated_audio_{sequence_number}.wav"
337
+ # self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate)
338
+ # translated_audio_paths.append(translated_audio_path)
339
+
340
+ # # Create a list to store the audio clips
341
+ # translated_audio_clips = []
342
+
343
+ # # Loop through each translated audio path and create an AudioFileClip
344
+ # for audio_path in translated_audio_paths:
345
+ # translated_audio_clip = mp.AudioFileClip(audio_path)
346
+ # translated_audio_clips.append(translated_audio_clip)
347
+
348
+ # # Concatenate the translated audio clips into a single audio file
349
+ # translated_audio = mp.concatenate_audioclips(translated_audio_clips)
350
+
351
+ # # Define the output audio file path
352
+ # output_audio_path = "translated_audio.wav"
353
+
354
+ # # Write the concatenated translated audio to the output file
355
+ # translated_audio.write_audiofile(output_audio_path)
356
+
357
+ # # Load the original video
358
+ # video = mp.VideoFileClip(self.video_path)
359
+
360
+ # # Load the translated audio
361
+ # translated_audio = mp.AudioFileClip(output_audio_path)
362
+
363
+ # # Set the audio of the video to the translated audio
364
+ # video = video.set_audio(translated_audio)
365
+
366
+ # # Define the output video file path
367
+ # output_video_path = "translated_video.mp4"
368
+
369
+ # # Write the video with translated audio to the output file
370
+ # video.write_videofile(output_video_path, codec="libx264", audio_codec="aac")
371
+
372
+ # # Clean up temporary files
373
+ # self.cleanup_temp_files()
374
+
375
+ # def cleanup_temp_files(self):
376
+ # temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)] # Adjust the range accordingly
377
+ # for file in temp_files:
378
+ # if os.path.exists(file):
379
+ # os.remove(file)
380
+ # print(f"Deleted {file}")