File size: 9,182 Bytes
ed2fbbc
 
 
 
 
 
 
84d1f79
ed2fbbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf0d89b
 
 
 
 
 
 
ed2fbbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf0d89b
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import os
import moviepy.editor as mp
import assemblyai as aai
import requests
import azure.cognitiveservices.speech as speechsdk
from moviepy.editor import AudioFileClip
from gradio_client import Client

class Translate:
    def __init__(self, video_path, target_language,original_language,speaking_rate):
        self.video_path = video_path
        self.target_language = target_language
        self.original_language=original_language
        self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
        self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161"
        self.translation_api_key = "394833878dd54214886cd81a35ac35dc"
        self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f"
        self.speaking_rate= speaking_rate

    def extract_audio(self):
        aai.settings.api_key = self.aai_api_key
        video = mp.VideoFileClip(self.video_path)
        audio = video.audio
        audio_path = "audio.wav"
        audio.write_audiofile(audio_path)
        print("Audio extracted successfully!")
        return audio_path

    def gender_detection(self):
        gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/"
        gender_client = Client(gender_model_url)
        gender = gender_client.predict(
            'audio.wav', api_name="/predict"
        )
        print(gender)
        return gender



    def org_language_parameters(self,original_language):
        if original_language == 'English':
            self.lan_code='en'
        elif original_language =='German':
            self.lan_code='de'
        elif original_language =='French':
            self.lan_code='fr'
        elif original_language =='Spanish':
            self.lan_code='es'
        else:
            self.lan_code = ''

    def set_language_parameters(self, target_language, detected_gender):
            if target_language == 'English':
                self.language_code = 'en-US'
                self.trans_code = 'en'
                self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural'
            elif target_language == 'German':
                self.language_code = 'de-DE'
                self.trans_code = 'de'
                self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural'
            elif target_language == 'French':
                self.language_code = 'fr-CA'
                self.trans_code = 'fr'
                self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural'
            elif target_language == 'Spanish':
                self.language_code = 'es-ES'
                self.trans_code = 'es'
                self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural'
            elif target_language == 'Urdu':
                self.language_code = 'ur-PK'
                self.trans_code = 'ur'
                self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural'
            else:
                # Handle unsupported languages or set default values
                self.voice_names = []
                self.language_code = ''
                self.trans_code = ''


            print("Target Language:", target_language)
            print("Trans Code:", self.trans_code)

    def get_voice_names(self):
        return self.voice_names

    def get_language_code(self):
        return self.language_code



    def get_audio_duration(self, audio_path):
        audio_clip = AudioFileClip(audio_path)
        audio_duration = audio_clip.duration
        return audio_duration

    def transcribe_audio(self, audio_path):
        aai.settings.api_key = self.aai_api_key
        config = aai.TranscriptionConfig(self.lan_code)
        transcriber = aai.Transcriber(config=config)
        transcript = transcriber.transcribe(audio_path)
        file_path = "transcript.srt"
        filepath = "t.txt"
        with open(file_path, "w") as file:
            file.write(transcript.export_subtitles_srt())
        with open(filepath, "w") as file:
            file.write(transcript.text)


    def generate_ssml(self, text, speaking_rate):
        # Construct SSML with the given text, speaking rate, voice name, and language code
        return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self.language_code}"><voice name="{self.voice_names}"><prosody rate="{speaking_rate}">{text}</prosody></voice></speak>'
    def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate):
        ssml = self.generate_ssml(text, speaking_rate)
        speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion)
        audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file)
        speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
        speech_synthesizer.speak_ssml_async(ssml).get()

    def translate_text(self, text):
        base_url = "https://api.cognitive.microsofttranslator.com"
        endpoint = "/translate"
        headers = {
            "Ocp-Apim-Subscription-Key": self.translation_api_key,
            "Content-Type": "application/json",
            "Ocp-Apim-Subscription-Region": "southeastasia"
        }

        params = {
            "api-version": "3.0",
            "to": self.trans_code
        }
        body = [{"text": text}]

        response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
        response.raise_for_status()
        translation = response.json()[0]["translations"][0]["text"]
        return translation

    def transcribe_and_translate(self):
        audio_path = self.extract_audio()
        self.org_language_parameters(self.original_language)
        self.transcribe_audio(audio_path)
        gender = self.gender_detection()
        print("Detected Gender:", gender)
        self.set_language_parameters(self.target_language,gender)
        with open("transcript.srt", 'r') as srt_file:
            original_srt_content = srt_file.read()

        original_subtitles = original_srt_content.strip().split('\n\n')

        translated_subtitles = []
        for subtitle in original_subtitles:
            lines = subtitle.split('\n')
            sequence_number = lines[0]
            timestamp = lines[1]
            original_text = '\n'.join(lines[2:])
            translated_text = self.translate_text(original_text)
            translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}"
            translated_subtitles.append(translated_subtitle)

        translated_srt_content = '\n\n'.join(translated_subtitles)
        translated_srt_path = "translated_file.srt"

        with open(translated_srt_path, 'w', encoding='utf-8') as srt_file:
            srt_file.write(translated_srt_content)

        # Loop through each translated subtitle and generate speech
        translated_audio_paths = []
        for subtitle in translated_subtitles:
            lines = subtitle.split('\n')
            sequence_number = lines[0]
            timestamp = lines[1]
            translated_text = '\n'.join(lines[2:])
            translated_audio_path = f"translated_audio_{sequence_number}.wav"
            self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate)
            translated_audio_paths.append(translated_audio_path)

        # Create a list to store the audio clips
        translated_audio_clips = []

        # Loop through each translated audio path and create an AudioFileClip
        for audio_path in translated_audio_paths:
            translated_audio_clip = mp.AudioFileClip(audio_path)
            translated_audio_clips.append(translated_audio_clip)

        # Concatenate the translated audio clips into a single audio file
        translated_audio = mp.concatenate_audioclips(translated_audio_clips)

        # Define the output audio file path
        output_audio_path = "translated_audio.wav"

        # Write the concatenated translated audio to the output file
        translated_audio.write_audiofile(output_audio_path)

        # Load the original video
        video = mp.VideoFileClip(self.video_path)

        # Load the translated audio
        translated_audio = mp.AudioFileClip(output_audio_path)

        # Set the audio of the video to the translated audio
        video = video.set_audio(translated_audio)

        # Define the output video file path
        output_video_path = "translated_video.mp4"

        # Write the video with translated audio to the output file
        video.write_videofile(output_video_path, codec="libx264", audio_codec="aac")

        # Clean up temporary files
        self.cleanup_temp_files()

    def cleanup_temp_files(self):
        temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)]  # Adjust the range accordingly
        for file in temp_files:
            if os.path.exists(file):
                os.remove(file)
                print(f"Deleted {file}")