File size: 10,950 Bytes
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b1563fc
 
 
 
 
 
 
ee47056
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee47056
 
 
0a26c6f
 
 
 
ee47056
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee47056
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c82708e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
c82708e
 
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c82708e
 
 
 
0a26c6f
 
 
 
8b49f6d
 
ee47056
0a26c6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b49f6d
 
0a26c6f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
import gradio as gr
from subtitle import subtitle_maker,LANGUAGE_CODE
source_lang_list = ['Automatic', "English", "Hindi", "Bengali"]
available_language = LANGUAGE_CODE.keys()
source_lang_list.extend(available_language)

target_lang_list = ["English", "Hindi", "Bengali"]
target_lang_list.extend(available_language)


def update_target_lang(selected_src):
    """Update target language automatically when source changes."""
    if selected_src == "Automatic":
        return "English"
    else:
        return selected_src


def ui1():
    with gr.Blocks() as demo:
        gr.HTML("""
        <div style="text-align: center; margin: 20px auto; max-width: 800px;">
            <h1 style="font-size: 2.5em; margin-bottom: 10px;">🎬 Auto Subtitle Generator</h1>
            <p style="font-size: 1.2em; color: #555; margin-bottom: 15px;">If you have a large video, upload the audio instead, it's much faster to upload.</p>
            <a href="https://github.com/NeuralFalconYT/Auto-Subtitle-Generator-Free" target="_blank" style="display: inline-block; padding: 10px 20px; background-color: #4285F4; color: white; border-radius: 6px; text-decoration: none; font-size: 1em;">πŸ˜‡ Run on Google Colab</a>
        </div>
        """)

        with gr.Row():
            with gr.Column():
                upload_media = gr.File(label="Upload Audio or Video File")
                input_lang = gr.Dropdown(label="Source Language", choices=source_lang_list, value="English")
                generate_btn = gr.Button("πŸš€ Generate Subtitle", variant="primary")
                with gr.Accordion("βš™οΈ Translation using Google Translator", open=False):
                    output_lang = gr.Dropdown(label="Translate Into", choices=target_lang_list, value="English")

            with gr.Column():
              default_srt   = gr.File(label="🎯 Original Subtitles (Default Generated by whisper)")
              customized_srt = gr.File(label="πŸ“ Readable Subtitles (Multi-line)")
              word_level_srt = gr.File(label="πŸ”  Word-by-Word Subtitles")

              with gr.Accordion("🌍 Other Subtitle Formats", open=False):
                  translated_srt = gr.File(label="🌐 Translated Subtitles")
                  shorts_srt     = gr.File(label="πŸ“± Shorts/Reels Subtitles")
                  transcript_txt = gr.File(label="πŸ“„ Full Transcript (Text File)")
                  subtitle_json= gr.File(label="πŸ“„ Full Transcript (JSON File) To make .ass file")
                  word_json= gr.File(label="πŸ“„ Shorts Transcript (JSON File) To make .ass file")
                  transcript_box = gr.Textbox(label="πŸ—’οΈ Transcript Preview", lines=4,show_copy_button=True)

        generate_btn.click(
            fn=subtitle_maker,
            inputs=[upload_media, input_lang, output_lang],
            outputs=[default_srt, translated_srt, customized_srt, word_level_srt, shorts_srt, transcript_txt, subtitle_json,word_json,transcript_box]
        )

        input_lang.change(
            fn=update_target_lang,
            inputs=input_lang,
            outputs=output_lang
        )

        # Add example runs (optional)
        # gr.Examples(
        #     examples=[
        #         ["sample.mp3", "English", "English"],
        #         ["sample.wav", "English", "Hindi"],
        #     ],
        #     inputs=[upload_media, input_lang, output_lang]
        # )

    return demo



def prompt_translation(language):
    """
    Generates a dubbing-friendly translation prompt for an .srt subtitle file.
    Tailored for natural speech and timing accuracy.
    """
    prompt = f"""
-------------- You are a professional subtitle translator for **video dubbing**.
Translate the following `.srt` subtitle file into **{language}** while preserving timing, meaning, and emotional tone.

Output in JSON format exactly like this:

```json
{{
  "subtitle sequence number": {{
    "timestamp": "original timestamp",
    "actual subtitle text": "original English subtitle line",
    "dubbing": "natural, dubbing-friendly {language} translation"
  }}
}}
```

**Guidelines for Translation:**

1. **Understand the full context** before translating β€” read the entire subtitle file first.
2. Translate into **natural, conversational {language}**, not a direct word-for-word translation.
6. Keep translations **roughly similar in length** to the original so lip movements sync naturally.
"""
    return prompt


def prompt_fix_grammar(language="English"):
    """
    Generates a dubbing-friendly grammar correction prompt for an .srt subtitle file.
    Tailored for natural speech and timing accuracy.
    """
    prompt = f"""
-------------- You are a professional subtitle editor for **video dubbing**.
Fix the grammar, spelling, and awkward phrasing in the following `.srt` subtitle file while preserving timing, meaning, and emotional tone. Β 
Do NOT translate β€” keep everything in {language}.

Output in JSON format exactly like this:

```json
{{
"subtitle sequence number": {{
"timestamp": "original timestamp",
"actual subtitle text": "original {language} subtitle line",
"dubbing": "natural, dubbing-friendly corrected {language} line"
}}
}}
```

**Guidelines for Grammar Fixing:**

1.  **Understand the full context** before editing β€” read the entire subtitle file first.
2.  Correct grammar, spelling, and phrasing errors while keeping the same meaning.
4.  Keep corrections **roughly similar in length** to the original so lip movements sync naturally.
"""
    return prompt


def prompt_srt_to_romanized(language="Hindi"):
  """
  Generates a prompt for converting a .srt subtitle file
  from any language to a Romanized (Latin letters) version,
  preserving timing, meaning, punctuation, and formatting.
  """
  prompt = f"""
-------------- You are a professional subtitle editor tasked with converting subtitles to Romanized text.
Your task is to convert a `.srt` subtitle file from {language} to **Romanized {language}**, 
keeping everything exactly the same except using Latin letters for all words.

**Instructions:**
1. Preserve the original timestamp of each subtitle.
2. Keep the original meaning, punctuation, and formatting intact.
3. Convert **only the original subtitle text** to Roman letters, word by word.
4. Do not add, remove, or change any words.
5. Output in strict JSON format exactly like this:

```json
{{
"subtitle sequence number": {{
"timestamp": "original timestamp",
"original subtitle text": "original {language} subtitle line",
"dubbing": "Romanized, {language} line of original subtitle text"
}}
}}
````

Focus entirely on **accurate Romanization**; do not modify anything else.
"""
  return prompt



import pysrt

def prompt_maker(srt_path, target_language, task="Translation"):
    txt_path = srt_path.replace(".srt", ".txt")
    subs = pysrt.open(srt_path, encoding='utf-8')

    with open(txt_path, 'w', encoding='utf-8') as f:
        for sub in subs:
            f.write(f"{sub.index}\n")
            f.write(f"{sub.start} --> {sub.end}\n")
            f.write(f"{sub.text}\n\n")
        if task == "Translation":
            f.write(prompt_translation(target_language))
        if task=="Romanization":
            f.write(prompt_srt_to_romanized(target_language))
        else:
            f.write(prompt_fix_grammar(target_language))

    with open(txt_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # return both prompt text and original path
    return content, srt_path




import pysrt
import json 
import os 
def json_to_srt(json_script, srt_path):
    """
    Convert dubbing-friendly JSON back into .srt
    Uses original srt_path to name output <name>_dubbing.srt
    """
    os.makedirs("./dubbing_srt", exist_ok=True)

    base_name = os.path.basename(srt_path)
    name_no_ext, _ = os.path.splitext(base_name)
    output_srt_path = os.path.join("./dubbing_srt", f"{name_no_ext}_dubbing.srt")

    # Load JSON
    if isinstance(json_script, str):
        json_object = json.loads(json_script)
    else:
        json_object = json_script

    # Write to file
    with open(output_srt_path, "w", encoding="utf-8") as f:
        for i, (key, value) in enumerate(json_object.items(), start=1):
            f.write(f"{i}\n")
            f.write(f"{value['timestamp']}\n")
            f.write(f"{value['dubbing']}\n\n")

    return output_srt_path




def ui2():
    with gr.Blocks() as demo:
        gr.Markdown("<center><h1 style='font-size: 32px;'>🎬 Subtitle Translation Using LLM</h1></center>")

        # hidden state to keep original srt path
        srt_state = gr.State("")

        with gr.Row():
            with gr.Column():
                gr.Markdown("### Step 1: Generate Prompt")
                srt_file = gr.File(label="Upload .srt file generated by Whisper", file_types=[".srt"])
                task = gr.Dropdown(
                      ["Translation","Romanization","Fix Grammar [English to English for dubbing]"],
                      label="Select Task",
                      value="Translation",
                  )
                language = gr.Dropdown(target_lang_list, label="Select the language you want to translate into", value="English")
                generate_btn = gr.Button("Generate Prompt")
                output_prompt = gr.Textbox(
                    label="Copy & Paste this prompt in  https://aistudio.google.com/",
                    lines=20,
                    show_copy_button=True
                    
                )

            with gr.Column():
                gr.Markdown("### Step 2: Paste JSON & Convert Back to SRT")
                json_input = gr.Textbox(
                    label="Paste JSON script from https://aistudio.google.com/ ",
                    lines=20,
                    placeholder="Paste the JSON output here..."
                )
                convert_btn = gr.Button("Convert JSON β†’ SRT")
                srt_file_out = gr.File(label="Download new .srt")

        # Button actions
        generate_btn.click(
            fn=prompt_maker,
            inputs=[srt_file, language, task],
            outputs=[output_prompt, srt_state],   
        )

        convert_btn.click(
            fn=json_to_srt,
            inputs=[json_input, srt_state],       
            outputs=srt_file_out,
        )

    return demo




import click
@click.command()
@click.option("--debug", is_flag=True, default=False, help="Enable debug mode.")
@click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.")
def main(share,debug):
# def main(debug=True, share=True): 
  demo1 = ui1()
  demo2 = ui2()
  custom_css = """.gradio-container { font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, sans-serif; }"""
  demo = gr.TabbedInterface([demo1, demo2], ["Generate SRT File", "SRT Translation"], title="",theme=gr.themes.Soft(),css=custom_css)
  demo.queue().launch(share=share,debug=debug)
if __name__ == "__main__":
    main()