Spaces:
Sleeping
Sleeping
import os | |
import moviepy.editor as mp | |
import assemblyai as aai | |
import requests | |
import azure.cognitiveservices.speech as speechsdk | |
from moviepy.editor import AudioFileClip | |
from gradio_client import Client | |
class Translate: | |
def __init__(self, video_path,original_language, target_language): | |
self.video_path = video_path | |
self.target_language = target_language | |
self.original_language=original_language | |
self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2" | |
self.translation_api_key = "394833878dd54214886cd81a35ac35dc" | |
self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f" | |
def extract_audio(self): | |
aai.settings.api_key = self.aai_api_key | |
video = mp.VideoFileClip(self.video_path) | |
audio = video.audio | |
audio_path = "audio.wav" | |
audio.write_audiofile(audio_path) | |
print("Audio extracted successfully!") | |
return audio_path | |
def org_language_parameters(self,original_language): | |
if original_language == 'English': | |
self.lan_code='en' | |
elif original_language =='German': | |
self.lan_code='de' | |
elif original_language =='French': | |
self.lan_code='fr' | |
elif original_language =='Spanish': | |
self.lan_code='es' | |
else: | |
self.lan_code = '' | |
def set_language_parameters(self, target_language): | |
if target_language == 'English': | |
self.language_code = 'en-US' | |
self.trans_code = 'en' | |
elif target_language == 'German': | |
self.language_code = 'de-DE' | |
self.trans_code = 'de' | |
elif target_language == 'French': | |
self.language_code = 'fr-CA' | |
self.trans_code = 'fr' | |
elif target_language == 'Spanish': | |
self.language_code = 'es-ES' | |
self.trans_code = 'es' | |
elif target_language == 'Urdu': | |
self.language_code = 'ur-PK' | |
self.trans_code = 'ur' | |
else: | |
# Handle unsupported languages or set default values | |
self.voice_names = [] | |
self.language_code = '' | |
self.trans_code = '' | |
print("Target Language:", target_language) | |
print("Trans Code:", self.trans_code) | |
def get_voice_names(self): | |
return self.voice_names | |
def get_language_code(self): | |
return self.language_code | |
def transcribe_audio(self, audio_path): | |
aai.settings.api_key = self.aai_api_key | |
config = aai.TranscriptionConfig(self.lan_code) | |
transcriber = aai.Transcriber(config=config) | |
transcript = transcriber.transcribe(audio_path) | |
file_path = "transcript.srt" | |
filepath = "t.txt" | |
with open(file_path, "w") as file: | |
file.write(transcript.export_subtitles_srt()) | |
with open(filepath, "w") as file: | |
file.write(transcript.text) | |
def translate_text(self, text): | |
base_url = "https://api.cognitive.microsofttranslator.com" | |
endpoint = "/translate" | |
headers = { | |
"Ocp-Apim-Subscription-Key": self.translation_api_key, | |
"Content-Type": "application/json", | |
"Ocp-Apim-Subscription-Region": "southeastasia" | |
} | |
params = { | |
"api-version": "3.0", | |
"to": self.trans_code | |
} | |
body = [{"text": text}] | |
response = requests.post(base_url + endpoint, headers=headers, params=params, json=body) | |
response.raise_for_status() | |
translation = response.json()[0]["translations"][0]["text"] | |
return translation | |
def transcribe_and_translate(self): | |
audio_path = self.extract_audio() | |
self.org_language_parameters(self.original_language) | |
self.transcribe_audio(audio_path) | |
self.set_language_parameters(self.target_language) | |
# Assuming t.txt contains the original text | |
with open("t.txt", 'r', encoding='utf-8') as text_file: | |
original_text = text_file.read() | |
self.org_language_parameters(self.original_language) | |
# Translate the entire original text | |
translated_text = self.translate_text(original_text) | |
# Write the translated text to a new text file | |
translated_text_path = "translated_text.txt" | |
with open(translated_text_path, 'w', encoding='utf-8') as translated_file: | |
translated_file.write(translated_text) | |
print("Translation complete. Translated text saved to:", translated_text_path) | |
return translated_text_path | |
# class Translate: | |
# def __init__(self, video_path, target_language,original_language,speaking_rate): | |
# self.video_path = video_path | |
# self.target_language = target_language | |
# self.original_language=original_language | |
# self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2" | |
# self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161" | |
# self.translation_api_key = "394833878dd54214886cd81a35ac35dc" | |
# self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f" | |
# self.speaking_rate= speaking_rate | |
# self.print_parameters() | |
# def print_parameters(self): | |
# print("Video_Path" , self.video_path) | |
# print("original_language" , self.original_language) | |
# print("target_language" , self.target_language) | |
# print("speaking_rate" , self.speaking_rate) | |
# def extract_audio(self): | |
# aai.settings.api_key = self.aai_api_key | |
# video = mp.VideoFileClip(self.video_path) | |
# audio = video.audio | |
# audio_path = "audio.wav" | |
# audio.write_audiofile(audio_path) | |
# print("Audio extracted successfully!") | |
# return audio_path | |
# def gender_detection(self): | |
# # gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/" | |
# # gender_client = Client(gender_model_url) | |
# # gender = gender_client.predict( | |
# # 'audio.wav', api_name="/predict" | |
# # ) | |
# # print(gender) | |
# # return gender | |
# return "male" | |
# def org_language_parameters(self,original_language): | |
# if original_language == 'English': | |
# self.lan_code='en' | |
# elif original_language =='German': | |
# self.lan_code='de' | |
# elif original_language =='French': | |
# self.lan_code='fr' | |
# elif original_language =='Spanish': | |
# self.lan_code='es' | |
# else: | |
# self.lan_code = '' | |
# def set_language_parameters(self, target_language, detected_gender): | |
# if target_language == 'English': | |
# self.language_code = 'en-US' | |
# self.trans_code = 'en' | |
# self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural' | |
# elif target_language == 'German': | |
# self.language_code = 'de-DE' | |
# self.trans_code = 'de' | |
# self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural' | |
# elif target_language == 'French': | |
# self.language_code = 'fr-CA' | |
# self.trans_code = 'fr' | |
# self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural' | |
# elif target_language == 'Spanish': | |
# self.language_code = 'es-ES' | |
# self.trans_code = 'es' | |
# self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural' | |
# elif target_language == 'Urdu': | |
# self.language_code = 'ur-PK' | |
# self.trans_code = 'ur' | |
# self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural' | |
# else: | |
# # Handle unsupported languages or set default values | |
# self.voice_names = [] | |
# self.language_code = '' | |
# self.trans_code = '' | |
# print("Target Language:", target_language) | |
# print("Trans Code:", self.trans_code) | |
# def get_voice_names(self): | |
# return self.voice_names | |
# def get_language_code(self): | |
# return self.language_code | |
# def get_audio_duration(self, audio_path): | |
# audio_clip = AudioFileClip(audio_path) | |
# audio_duration = audio_clip.duration | |
# return audio_duration | |
# def transcribe_audio(self, audio_path): | |
# aai.settings.api_key = self.aai_api_key | |
# config = aai.TranscriptionConfig(self.lan_code) | |
# transcriber = aai.Transcriber(config=config) | |
# transcript = transcriber.transcribe(audio_path) | |
# file_path = "transcript.srt" | |
# filepath = "t.txt" | |
# with open(file_path, "w") as file: | |
# file.write(transcript.export_subtitles_srt()) | |
# with open(filepath, "w") as file: | |
# file.write(transcript.text) | |
# def generate_ssml(self, text, speaking_rate): | |
# # Construct SSML with the given text, speaking rate, voice name, and language code | |
# return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self.language_code}"><voice name="{self.voice_names}"><prosody rate="{speaking_rate}">{text}</prosody></voice></speak>' | |
# def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate): | |
# ssml = self.generate_ssml(text, speaking_rate) | |
# speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion) | |
# audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file) | |
# speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config) | |
# speech_synthesizer.speak_ssml_async(ssml).get() | |
# def translate_text(self, text): | |
# base_url = "https://api.cognitive.microsofttranslator.com" | |
# endpoint = "/translate" | |
# headers = { | |
# "Ocp-Apim-Subscription-Key": self.translation_api_key, | |
# "Content-Type": "application/json", | |
# "Ocp-Apim-Subscription-Region": "southeastasia" | |
# } | |
# params = { | |
# "api-version": "3.0", | |
# "to": self.trans_code | |
# } | |
# body = [{"text": text}] | |
# response = requests.post(base_url + endpoint, headers=headers, params=params, json=body) | |
# response.raise_for_status() | |
# translation = response.json()[0]["translations"][0]["text"] | |
# return translation | |
# def transcribe_and_translate(self): | |
# audio_path = self.extract_audio() | |
# self.org_language_parameters(self.original_language) | |
# self.transcribe_audio(audio_path) | |
# gender = self.gender_detection() | |
# print("Detected Gender:", gender) | |
# self.set_language_parameters(self.target_language,gender) | |
# with open("transcript.srt", 'r') as srt_file: | |
# original_srt_content = srt_file.read() | |
# original_subtitles = original_srt_content.strip().split('\n\n') | |
# translated_subtitles = [] | |
# for subtitle in original_subtitles: | |
# lines = subtitle.split('\n') | |
# sequence_number = lines[0] | |
# timestamp = lines[1] | |
# original_text = '\n'.join(lines[2:]) | |
# translated_text = self.translate_text(original_text) | |
# translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}" | |
# translated_subtitles.append(translated_subtitle) | |
# translated_srt_content = '\n\n'.join(translated_subtitles) | |
# translated_srt_path = "translated_file.srt" | |
# with open(translated_srt_path, 'w', encoding='utf-8') as srt_file: | |
# srt_file.write(translated_srt_content) | |
# # Loop through each translated subtitle and generate speech | |
# translated_audio_paths = [] | |
# for subtitle in translated_subtitles: | |
# lines = subtitle.split('\n') | |
# sequence_number = lines[0] | |
# timestamp = lines[1] | |
# translated_text = '\n'.join(lines[2:]) | |
# translated_audio_path = f"translated_audio_{sequence_number}.wav" | |
# self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate) | |
# translated_audio_paths.append(translated_audio_path) | |
# # Create a list to store the audio clips | |
# translated_audio_clips = [] | |
# # Loop through each translated audio path and create an AudioFileClip | |
# for audio_path in translated_audio_paths: | |
# translated_audio_clip = mp.AudioFileClip(audio_path) | |
# translated_audio_clips.append(translated_audio_clip) | |
# # Concatenate the translated audio clips into a single audio file | |
# translated_audio = mp.concatenate_audioclips(translated_audio_clips) | |
# # Define the output audio file path | |
# output_audio_path = "translated_audio.wav" | |
# # Write the concatenated translated audio to the output file | |
# translated_audio.write_audiofile(output_audio_path) | |
# # Load the original video | |
# video = mp.VideoFileClip(self.video_path) | |
# # Load the translated audio | |
# translated_audio = mp.AudioFileClip(output_audio_path) | |
# # Set the audio of the video to the translated audio | |
# video = video.set_audio(translated_audio) | |
# # Define the output video file path | |
# output_video_path = "translated_video.mp4" | |
# # Write the video with translated audio to the output file | |
# video.write_videofile(output_video_path, codec="libx264", audio_codec="aac") | |
# # Clean up temporary files | |
# self.cleanup_temp_files() | |
# def cleanup_temp_files(self): | |
# temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)] # Adjust the range accordingly | |
# for file in temp_files: | |
# if os.path.exists(file): | |
# os.remove(file) | |
# print(f"Deleted {file}") | |