File size: 9,704 Bytes
b293d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import os
import time
import traceback

import gradio as gr

from gui.asset_components import AssetComponentsUtils
from gui.ui_abstract_component import AbstractComponentUI
from gui.ui_components_html import GradioComponentsHTML
from shortGPT.audio.edge_voice_module import EdgeTTSVoiceModule
from shortGPT.audio.eleven_voice_module import ElevenLabsVoiceModule
from shortGPT.audio.coqui_voice_module import CoquiVoiceModule
from shortGPT.config.api_db import ApiKeyManager
from shortGPT.config.languages import (EDGE_TTS_VOICENAME_MAPPING,
                                       ELEVEN_SUPPORTED_LANGUAGES,
                                       COQUI_SUPPORTED_LANGUAGES,
                                       LANGUAGE_ACRONYM_MAPPING,
                                          Language)
from shortGPT.engine.multi_language_translation_engine import MultiLanguageTranslationEngine


class VideoTranslationUI(AbstractComponentUI):
    def __init__(self, shortGptUI: gr.Blocks):
        self.shortGptUI = shortGptUI
        self.eleven_language_choices = [lang.value.upper() for lang in ELEVEN_SUPPORTED_LANGUAGES]
        self.coqui_language_choices = [lang.value.upper() for lang in COQUI_SUPPORTED_LANGUAGES]
        self.embedHTML = '<div style="display: flex; overflow-x: auto; gap: 20px;">'
        self.progress_counter = 0
        self.video_translation_ui = None

    def create_ui(self):
        with gr.Row(visible=False) as video_translation_ui:
            with gr.Column():
                videoType = gr.Radio(["Youtube link", "Video file"], label="Input your video", value="Youtube link", interactive=True)
                video_path = gr.Video(source="upload", interactive=True, width=533.33, height=300, visible=False)
                yt_link = gr.Textbox(label="Youtube link (https://youtube.com/xyz): ", interactive=True, visible=False)
                videoType.change(lambda x: (gr.update(visible=x == "Video file"), gr.update(visible=x == "Youtube link")), [videoType], [video_path, yt_link])
                tts_engine = gr.Radio([AssetComponentsUtils.ELEVEN_TTS, AssetComponentsUtils.EDGE_TTS, AssetComponentsUtils.COQUI_TTS], label="Text to speech engine", value=AssetComponentsUtils.ELEVEN_TTS, interactive=True)

                with gr.Column(visible=True) as eleven_tts:
                    language_eleven = gr.CheckboxGroup(self.eleven_language_choices, label="Language", value="ENGLISH", interactive=True)
                    voice_eleven = AssetComponentsUtils.voiceChoiceTranslation(provider=AssetComponentsUtils.ELEVEN_TTS)
                with gr.Column(visible=False) as edge_tts:
                    language_edge = gr.CheckboxGroup([lang.value.upper() for lang in Language], label="Language", value="ENGLISH", interactive=True)
                with gr.Column(visible=False) as coqui_tts:
                    language_coqui = gr.CheckboxGroup(self.coqui_language_choices, label="Language", value="ENGLISH", interactive=True)
                    voice_coqui = AssetComponentsUtils.voiceChoiceTranslation(provider=AssetComponentsUtils.COQUI_TTS)
                tts_engine.change(lambda x: (gr.update(visible=x == AssetComponentsUtils.ELEVEN_TTS), gr.update(visible=x == AssetComponentsUtils.EDGE_TTS), gr.update(visible=x == AssetComponentsUtils.COQUI_TTS)), [tts_engine], [eleven_tts, edge_tts, coqui_tts])

                useCaptions = gr.Checkbox(label="Caption video", value=False)

                translateButton = gr.Button(label="Create Shorts")

                generation_error = gr.HTML(visible=False)
                video_folder = gr.Button("πŸ“", visible=True)
                output = gr.HTML()

            video_folder.click(lambda _: AssetComponentsUtils.start_file(os.path.abspath("videos/")))
            translateButton.click(self.inspect_create_inputs, inputs=[videoType, video_path, yt_link, tts_engine, language_eleven, language_edge, ], outputs=[generation_error]).success(self.translate_video, inputs=[
                videoType, yt_link, video_path, tts_engine, language_eleven, language_edge, language_coqui, useCaptions, voice_eleven, voice_coqui
            ], outputs=[output, video_folder, generation_error])
        self.video_translation_ui = video_translation_ui
        return self.video_translation_ui

    def translate_video(self, videoType, yt_link, video_path, tts_engine, language_eleven, language_edge, language_coqui, use_captions: bool, voice_eleven: str,voice_coqui:str, progress=gr.Progress()) -> str:
        if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
            languages = [Language(lang.lower().capitalize()) for lang in language_eleven]
        elif tts_engine == AssetComponentsUtils.EDGE_TTS:
            languages = [Language(lang.lower().capitalize()) for lang in language_edge]
        elif tts_engine == AssetComponentsUtils.COQUI_TTS:
            languages = [Language(lang.lower().capitalize()) for lang in language_coqui]
        try:
            for i, language in enumerate(languages):
                if tts_engine == AssetComponentsUtils.EDGE_TTS:
                    voice_module = EdgeTTSVoiceModule(EDGE_TTS_VOICENAME_MAPPING[language]['male'])
                if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
                    voice_module = ElevenLabsVoiceModule(ApiKeyManager.get_api_key('ELEVEN LABS'), voice_eleven, checkElevenCredits=True)
                if tts_engine == AssetComponentsUtils.COQUI_TTS:
                    voice_module = CoquiVoiceModule(voice_coqui, LANGUAGE_ACRONYM_MAPPING[language])
                content_translation_engine = MultiLanguageTranslationEngine(voiceModule=voice_module, src_url=yt_link if videoType == "Youtube link" else video_path, target_language=language, use_captions=use_captions)
                num_steps = content_translation_engine.get_total_steps()
                def logger(prog_str):
                    progress(self.progress_counter / (num_steps), f"Translating your video ({i+1}/{len(languages)}) - {prog_str}")
                content_translation_engine.set_logger(logger)

                for step_num, step_info in content_translation_engine.makeContent():
                    progress(self.progress_counter / (num_steps), f"Translating your video ({i+1}/{len(languages)}) - {step_info}")
                    self.progress_counter += 1

                video_path = content_translation_engine.get_video_output_path()
                current_url = self.shortGptUI.share_url+"/" if self.shortGptUI.share else self.shortGptUI.local_url
                file_url_path = f"{current_url}file={video_path}"
                file_name = video_path.split("/")[-1].split("\\")[-1]
                self.embedHTML += f'''
                <div style="display: flex; flex-direction: column; align-items: center;">
                    <video width="{500}"  style="max-height: 100%;" controls>
                        <source src="{file_url_path}" type="video/mp4">
                        Your browser does not support the video tag.
                    </video>
                    <a href="{file_url_path}" download="{file_name}" style="margin-top: 10px;">
                        <button style="font-size: 1em; padding: 10px; border: none; cursor: pointer; color: white; background: #007bff;">Download Video</button>
                    </a>
                </div>'''
                yield "<div>"+self.embedHTML + '</div>', gr.Button.update(visible=True), gr.update(visible=False)

        except Exception as e:
            traceback_str = ''.join(traceback.format_tb(e.__traceback__))
            error_name = type(e).__name__.capitalize() + " : " + f"{e.args[0]}"
            print("Error", traceback_str)
            error_html = GradioComponentsHTML.get_html_error_template().format(error_message=error_name, stack_trace=traceback_str)
            return self.embedHTML + '</div>', gr.Button.update(visible=True), gr.update(value=error_html, visible=True)

    def inspect_create_inputs(self, videoType, video_path, yt_link,  tts_engine, language_eleven, language_edge,):
        supported_extensions = ['.mp4', '.avi', '.mov']  # Add more supported video extensions if needed
        print(videoType, video_path, yt_link)
        if videoType == "Youtube link":
            if not yt_link.startswith("https://youtube.com/") and not yt_link.startswith("https://www.youtube.com/"):
                raise gr.Error('Invalid YouTube URL. Please provide a valid URL. Link example: https://www.youtube.com/watch?v=dQw4w9WgXcQ')
        else:
            if not video_path or not os.path.exists(video_path):
                raise gr.Error('You must drag and drop a valid video file.')

            file_ext = os.path.splitext(video_path)[-1].lower()
            if file_ext not in supported_extensions:
                raise gr.Error('Invalid video file. Supported video file extensions are: {}'.format(', '.join(supported_extensions)))
        if tts_engine == AssetComponentsUtils.ELEVEN_TTS:
            if not len(language_eleven) >0:
                raise gr.Error('You must select one or more target languages')
        if tts_engine == AssetComponentsUtils.EDGE_TTS:
            if not len(language_edge) >0:
                raise gr.Error('You must select one or more target languages')
        return gr.update(visible=False)


def update_progress(progress, progress_counter, num_steps, num_shorts, stop_event):
    start_time = time.time()
    while not stop_event.is_set():
        elapsed_time = time.time() - start_time
        dynamic = int(3649 * elapsed_time / 600)
        progress(progress_counter / (num_steps * num_shorts), f"Rendering progress - {dynamic}/3649")
        time.sleep(0.1)  # update every 0.1 second