import os import moviepy.editor as mp import assemblyai as aai import requests import azure.cognitiveservices.speech as speechsdk from moviepy.editor import AudioFileClip from gradio_client import Client class Translate: def __init__(self, video_path,original_language, target_language): self.video_path = video_path self.target_language = target_language self.original_language=original_language self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2" self.translation_api_key = "394833878dd54214886cd81a35ac35dc" self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f" def extract_audio(self): aai.settings.api_key = self.aai_api_key video = mp.VideoFileClip(self.video_path) audio = video.audio audio_path = "audio.wav" audio.write_audiofile(audio_path) print("Audio extracted successfully!") return audio_path def org_language_parameters(self,original_language): if original_language == 'English': self.lan_code='en' elif original_language =='German': self.lan_code='de' elif original_language =='French': self.lan_code='fr' elif original_language =='Spanish': self.lan_code='es' else: self.lan_code = '' def set_language_parameters(self, target_language): if target_language == 'English': self.language_code = 'en-US' self.trans_code = 'en' elif target_language == 'German': self.language_code = 'de-DE' self.trans_code = 'de' elif target_language == 'French': self.language_code = 'fr-CA' self.trans_code = 'fr' elif target_language == 'Spanish': self.language_code = 'es-ES' self.trans_code = 'es' elif target_language == 'Urdu': self.language_code = 'ur-PK' self.trans_code = 'ur' else: # Handle unsupported languages or set default values self.voice_names = [] self.language_code = '' self.trans_code = '' print("Target Language:", target_language) print("Trans Code:", self.trans_code) def get_voice_names(self): return self.voice_names def get_language_code(self): return self.language_code def transcribe_audio(self, audio_path): aai.settings.api_key = self.aai_api_key config = aai.TranscriptionConfig(self.lan_code) transcriber = aai.Transcriber(config=config) transcript = transcriber.transcribe(audio_path) file_path = "transcript.srt" filepath = "t.txt" with open(file_path, "w") as file: file.write(transcript.export_subtitles_srt()) with open(filepath, "w") as file: file.write(transcript.text) def translate_text(self, text): base_url = "https://api.cognitive.microsofttranslator.com" endpoint = "/translate" headers = { "Ocp-Apim-Subscription-Key": self.translation_api_key, "Content-Type": "application/json", "Ocp-Apim-Subscription-Region": "southeastasia" } params = { "api-version": "3.0", "to": self.trans_code } body = [{"text": text}] response = requests.post(base_url + endpoint, headers=headers, params=params, json=body) response.raise_for_status() translation = response.json()[0]["translations"][0]["text"] return translation def transcribe_and_translate(self): audio_path = self.extract_audio() self.org_language_parameters(self.original_language) self.transcribe_audio(audio_path) self.set_language_parameters(self.target_language) # Assuming t.txt contains the original text with open("t.txt", 'r', encoding='utf-8') as text_file: original_text = text_file.read() self.org_language_parameters(self.original_language) # Translate the entire original text translated_text = self.translate_text(original_text) # Write the translated text to a new text file translated_text_path = "translated_text.txt" with open(translated_text_path, 'w', encoding='utf-8') as translated_file: translated_file.write(translated_text) print("Translation complete. Translated text saved to:", translated_text_path) return translated_text_path # class Translate: # def __init__(self, video_path, target_language,original_language,speaking_rate): # self.video_path = video_path # self.target_language = target_language # self.original_language=original_language # self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2" # self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161" # self.translation_api_key = "394833878dd54214886cd81a35ac35dc" # self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f" # self.speaking_rate= speaking_rate # self.print_parameters() # def print_parameters(self): # print("Video_Path" , self.video_path) # print("original_language" , self.original_language) # print("target_language" , self.target_language) # print("speaking_rate" , self.speaking_rate) # def extract_audio(self): # aai.settings.api_key = self.aai_api_key # video = mp.VideoFileClip(self.video_path) # audio = video.audio # audio_path = "audio.wav" # audio.write_audiofile(audio_path) # print("Audio extracted successfully!") # return audio_path # def gender_detection(self): # # gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/" # # gender_client = Client(gender_model_url) # # gender = gender_client.predict( # # 'audio.wav', api_name="/predict" # # ) # # print(gender) # # return gender # return "male" # def org_language_parameters(self,original_language): # if original_language == 'English': # self.lan_code='en' # elif original_language =='German': # self.lan_code='de' # elif original_language =='French': # self.lan_code='fr' # elif original_language =='Spanish': # self.lan_code='es' # else: # self.lan_code = '' # def set_language_parameters(self, target_language, detected_gender): # if target_language == 'English': # self.language_code = 'en-US' # self.trans_code = 'en' # self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural' # elif target_language == 'German': # self.language_code = 'de-DE' # self.trans_code = 'de' # self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural' # elif target_language == 'French': # self.language_code = 'fr-CA' # self.trans_code = 'fr' # self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural' # elif target_language == 'Spanish': # self.language_code = 'es-ES' # self.trans_code = 'es' # self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural' # elif target_language == 'Urdu': # self.language_code = 'ur-PK' # self.trans_code = 'ur' # self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural' # else: # # Handle unsupported languages or set default values # self.voice_names = [] # self.language_code = '' # self.trans_code = '' # print("Target Language:", target_language) # print("Trans Code:", self.trans_code) # def get_voice_names(self): # return self.voice_names # def get_language_code(self): # return self.language_code # def get_audio_duration(self, audio_path): # audio_clip = AudioFileClip(audio_path) # audio_duration = audio_clip.duration # return audio_duration # def transcribe_audio(self, audio_path): # aai.settings.api_key = self.aai_api_key # config = aai.TranscriptionConfig(self.lan_code) # transcriber = aai.Transcriber(config=config) # transcript = transcriber.transcribe(audio_path) # file_path = "transcript.srt" # filepath = "t.txt" # with open(file_path, "w") as file: # file.write(transcript.export_subtitles_srt()) # with open(filepath, "w") as file: # file.write(transcript.text) # def generate_ssml(self, text, speaking_rate): # # Construct SSML with the given text, speaking rate, voice name, and language code # return f'{text}' # def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate): # ssml = self.generate_ssml(text, speaking_rate) # speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion) # audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file) # speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) # speech_synthesizer.speak_ssml_async(ssml).get() # def translate_text(self, text): # base_url = "https://api.cognitive.microsofttranslator.com" # endpoint = "/translate" # headers = { # "Ocp-Apim-Subscription-Key": self.translation_api_key, # "Content-Type": "application/json", # "Ocp-Apim-Subscription-Region": "southeastasia" # } # params = { # "api-version": "3.0", # "to": self.trans_code # } # body = [{"text": text}] # response = requests.post(base_url + endpoint, headers=headers, params=params, json=body) # response.raise_for_status() # translation = response.json()[0]["translations"][0]["text"] # return translation # def transcribe_and_translate(self): # audio_path = self.extract_audio() # self.org_language_parameters(self.original_language) # self.transcribe_audio(audio_path) # gender = self.gender_detection() # print("Detected Gender:", gender) # self.set_language_parameters(self.target_language,gender) # with open("transcript.srt", 'r') as srt_file: # original_srt_content = srt_file.read() # original_subtitles = original_srt_content.strip().split('\n\n') # translated_subtitles = [] # for subtitle in original_subtitles: # lines = subtitle.split('\n') # sequence_number = lines[0] # timestamp = lines[1] # original_text = '\n'.join(lines[2:]) # translated_text = self.translate_text(original_text) # translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}" # translated_subtitles.append(translated_subtitle) # translated_srt_content = '\n\n'.join(translated_subtitles) # translated_srt_path = "translated_file.srt" # with open(translated_srt_path, 'w', encoding='utf-8') as srt_file: # srt_file.write(translated_srt_content) # # Loop through each translated subtitle and generate speech # translated_audio_paths = [] # for subtitle in translated_subtitles: # lines = subtitle.split('\n') # sequence_number = lines[0] # timestamp = lines[1] # translated_text = '\n'.join(lines[2:]) # translated_audio_path = f"translated_audio_{sequence_number}.wav" # self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate) # translated_audio_paths.append(translated_audio_path) # # Create a list to store the audio clips # translated_audio_clips = [] # # Loop through each translated audio path and create an AudioFileClip # for audio_path in translated_audio_paths: # translated_audio_clip = mp.AudioFileClip(audio_path) # translated_audio_clips.append(translated_audio_clip) # # Concatenate the translated audio clips into a single audio file # translated_audio = mp.concatenate_audioclips(translated_audio_clips) # # Define the output audio file path # output_audio_path = "translated_audio.wav" # # Write the concatenated translated audio to the output file # translated_audio.write_audiofile(output_audio_path) # # Load the original video # video = mp.VideoFileClip(self.video_path) # # Load the translated audio # translated_audio = mp.AudioFileClip(output_audio_path) # # Set the audio of the video to the translated audio # video = video.set_audio(translated_audio) # # Define the output video file path # output_video_path = "translated_video.mp4" # # Write the video with translated audio to the output file # video.write_videofile(output_video_path, codec="libx264", audio_codec="aac") # # Clean up temporary files # self.cleanup_temp_files() # def cleanup_temp_files(self): # temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)] # Adjust the range accordingly # for file in temp_files: # if os.path.exists(file): # os.remove(file) # print(f"Deleted {file}")