|
import gradio as gr |
|
from subtitle import subtitle_maker,LANGUAGE_CODE |
|
source_lang_list = ['Automatic', "English", "Hindi", "Bengali"] |
|
available_language = LANGUAGE_CODE.keys() |
|
source_lang_list.extend(available_language) |
|
|
|
target_lang_list = ["English", "Hindi", "Bengali"] |
|
target_lang_list.extend(available_language) |
|
|
|
|
|
def update_target_lang(selected_src): |
|
"""Update target language automatically when source changes.""" |
|
if selected_src == "Automatic": |
|
return "English" |
|
else: |
|
return selected_src |
|
|
|
|
|
def ui1(): |
|
with gr.Blocks() as demo: |
|
gr.HTML(""" |
|
<div style="text-align: center; margin: 20px auto; max-width: 800px;"> |
|
<h1 style="font-size: 2.5em; margin-bottom: 10px;">π¬ Auto Subtitle Generator</h1> |
|
<p style="font-size: 1.2em; color: #555; margin-bottom: 15px;">If you have a large video, upload the audio instead, it's much faster to upload.</p> |
|
<a href="https://github.com/NeuralFalconYT/Auto-Subtitle-Generator-Free" target="_blank" style="display: inline-block; padding: 10px 20px; background-color: #4285F4; color: white; border-radius: 6px; text-decoration: none; font-size: 1em;">π Run on Google Colab</a> |
|
</div> |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
upload_media = gr.File(label="Upload Audio or Video File") |
|
input_lang = gr.Dropdown(label="Source Language", choices=source_lang_list, value="English") |
|
generate_btn = gr.Button("π Generate Subtitle", variant="primary") |
|
with gr.Accordion("βοΈ Translation using Google Translator", open=False): |
|
output_lang = gr.Dropdown(label="Translate Into", choices=target_lang_list, value="English") |
|
|
|
with gr.Column(): |
|
default_srt = gr.File(label="π― Original Subtitles (Default Generated by whisper)") |
|
customized_srt = gr.File(label="π Readable Subtitles (Multi-line)") |
|
word_level_srt = gr.File(label="π Word-by-Word Subtitles") |
|
|
|
with gr.Accordion("π Other Subtitle Formats", open=False): |
|
translated_srt = gr.File(label="π Translated Subtitles") |
|
shorts_srt = gr.File(label="π± Shorts/Reels Subtitles") |
|
transcript_txt = gr.File(label="π Full Transcript (Text File)") |
|
subtitle_json= gr.File(label="π Full Transcript (JSON File) To make .ass file") |
|
word_json= gr.File(label="π Shorts Transcript (JSON File) To make .ass file") |
|
transcript_box = gr.Textbox(label="ποΈ Transcript Preview", lines=4,show_copy_button=True) |
|
|
|
generate_btn.click( |
|
fn=subtitle_maker, |
|
inputs=[upload_media, input_lang, output_lang], |
|
outputs=[default_srt, translated_srt, customized_srt, word_level_srt, shorts_srt, transcript_txt, subtitle_json,word_json,transcript_box] |
|
) |
|
|
|
input_lang.change( |
|
fn=update_target_lang, |
|
inputs=input_lang, |
|
outputs=output_lang |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
def prompt_translation(language): |
|
""" |
|
Generates a dubbing-friendly translation prompt for an .srt subtitle file. |
|
Tailored for natural speech and timing accuracy. |
|
""" |
|
prompt = f""" |
|
-------------- You are a professional subtitle translator for **video dubbing**. |
|
Translate the following `.srt` subtitle file into **{language}** while preserving timing, meaning, and emotional tone. |
|
|
|
Output in JSON format exactly like this: |
|
|
|
```json |
|
{{ |
|
"subtitle sequence number": {{ |
|
"timestamp": "original timestamp", |
|
"actual subtitle text": "original English subtitle line", |
|
"dubbing": "natural, dubbing-friendly {language} translation" |
|
}} |
|
}} |
|
``` |
|
|
|
**Guidelines for Translation:** |
|
|
|
1. **Understand the full context** before translating β read the entire subtitle file first. |
|
2. Translate into **natural, conversational {language}**, not a direct word-for-word translation. |
|
6. Keep translations **roughly similar in length** to the original so lip movements sync naturally. |
|
""" |
|
return prompt |
|
|
|
|
|
def prompt_fix_grammar(language="English"): |
|
""" |
|
Generates a dubbing-friendly grammar correction prompt for an .srt subtitle file. |
|
Tailored for natural speech and timing accuracy. |
|
""" |
|
prompt = f""" |
|
-------------- You are a professional subtitle editor for **video dubbing**. |
|
Fix the grammar, spelling, and awkward phrasing in the following `.srt` subtitle file while preserving timing, meaning, and emotional tone. Β |
|
Do NOT translate β keep everything in {language}. |
|
|
|
Output in JSON format exactly like this: |
|
|
|
```json |
|
{{ |
|
"subtitle sequence number": {{ |
|
"timestamp": "original timestamp", |
|
"actual subtitle text": "original {language} subtitle line", |
|
"dubbing": "natural, dubbing-friendly corrected {language} line" |
|
}} |
|
}} |
|
``` |
|
|
|
**Guidelines for Grammar Fixing:** |
|
|
|
1. **Understand the full context** before editing β read the entire subtitle file first. |
|
2. Correct grammar, spelling, and phrasing errors while keeping the same meaning. |
|
4. Keep corrections **roughly similar in length** to the original so lip movements sync naturally. |
|
""" |
|
return prompt |
|
|
|
|
|
def prompt_srt_to_romanized(language="Hindi"): |
|
""" |
|
Generates a prompt for converting a .srt subtitle file |
|
from any language to a Romanized (Latin letters) version, |
|
preserving timing, meaning, punctuation, and formatting. |
|
""" |
|
prompt = f""" |
|
-------------- You are a professional subtitle editor tasked with converting subtitles to Romanized text. |
|
Your task is to convert a `.srt` subtitle file from {language} to **Romanized {language}**, |
|
keeping everything exactly the same except using Latin letters for all words. |
|
|
|
**Instructions:** |
|
1. Preserve the original timestamp of each subtitle. |
|
2. Keep the original meaning, punctuation, and formatting intact. |
|
3. Convert **only the original subtitle text** to Roman letters, word by word. |
|
4. Do not add, remove, or change any words. |
|
5. Output in strict JSON format exactly like this: |
|
|
|
```json |
|
{{ |
|
"subtitle sequence number": {{ |
|
"timestamp": "original timestamp", |
|
"original subtitle text": "original {language} subtitle line", |
|
"dubbing": "Romanized, {language} line of original subtitle text" |
|
}} |
|
}} |
|
```` |
|
|
|
Focus entirely on **accurate Romanization**; do not modify anything else. |
|
""" |
|
return prompt |
|
|
|
|
|
|
|
import pysrt |
|
|
|
def prompt_maker(srt_path, target_language, task="Translation"): |
|
txt_path = srt_path.replace(".srt", ".txt") |
|
subs = pysrt.open(srt_path, encoding='utf-8') |
|
|
|
with open(txt_path, 'w', encoding='utf-8') as f: |
|
for sub in subs: |
|
f.write(f"{sub.index}\n") |
|
f.write(f"{sub.start} --> {sub.end}\n") |
|
f.write(f"{sub.text}\n\n") |
|
if task == "Translation": |
|
f.write(prompt_translation(target_language)) |
|
if task=="Romanization": |
|
f.write(prompt_srt_to_romanized(target_language)) |
|
else: |
|
f.write(prompt_fix_grammar(target_language)) |
|
|
|
with open(txt_path, 'r', encoding='utf-8') as f: |
|
content = f.read() |
|
|
|
|
|
return content, srt_path |
|
|
|
|
|
|
|
|
|
import pysrt |
|
import json |
|
import os |
|
def json_to_srt(json_script, srt_path): |
|
""" |
|
Convert dubbing-friendly JSON back into .srt |
|
Uses original srt_path to name output <name>_dubbing.srt |
|
""" |
|
os.makedirs("./dubbing_srt", exist_ok=True) |
|
|
|
base_name = os.path.basename(srt_path) |
|
name_no_ext, _ = os.path.splitext(base_name) |
|
output_srt_path = os.path.join("./dubbing_srt", f"{name_no_ext}_dubbing.srt") |
|
|
|
|
|
if isinstance(json_script, str): |
|
json_object = json.loads(json_script) |
|
else: |
|
json_object = json_script |
|
|
|
|
|
with open(output_srt_path, "w", encoding="utf-8") as f: |
|
for i, (key, value) in enumerate(json_object.items(), start=1): |
|
f.write(f"{i}\n") |
|
f.write(f"{value['timestamp']}\n") |
|
f.write(f"{value['dubbing']}\n\n") |
|
|
|
return output_srt_path |
|
|
|
|
|
|
|
|
|
def ui2(): |
|
with gr.Blocks() as demo: |
|
gr.Markdown("<center><h1 style='font-size: 32px;'>π¬ Subtitle Translation Using LLM</h1></center>") |
|
|
|
|
|
srt_state = gr.State("") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("### Step 1: Generate Prompt") |
|
srt_file = gr.File(label="Upload .srt file generated by Whisper", file_types=[".srt"]) |
|
task = gr.Dropdown( |
|
["Translation","Romanization","Fix Grammar [English to English for dubbing]"], |
|
label="Select Task", |
|
value="Translation", |
|
) |
|
language = gr.Dropdown(target_lang_list, label="Select the language you want to translate into", value="English") |
|
generate_btn = gr.Button("Generate Prompt") |
|
output_prompt = gr.Textbox( |
|
label="Copy & Paste this prompt in https://aistudio.google.com/", |
|
lines=20, |
|
show_copy_button=True |
|
|
|
) |
|
|
|
with gr.Column(): |
|
gr.Markdown("### Step 2: Paste JSON & Convert Back to SRT") |
|
json_input = gr.Textbox( |
|
label="Paste JSON script from https://aistudio.google.com/ ", |
|
lines=20, |
|
placeholder="Paste the JSON output here..." |
|
) |
|
convert_btn = gr.Button("Convert JSON β SRT") |
|
srt_file_out = gr.File(label="Download new .srt") |
|
|
|
|
|
generate_btn.click( |
|
fn=prompt_maker, |
|
inputs=[srt_file, language, task], |
|
outputs=[output_prompt, srt_state], |
|
) |
|
|
|
convert_btn.click( |
|
fn=json_to_srt, |
|
inputs=[json_input, srt_state], |
|
outputs=srt_file_out, |
|
) |
|
|
|
return demo |
|
|
|
|
|
|
|
|
|
import click |
|
@click.command() |
|
@click.option("--debug", is_flag=True, default=False, help="Enable debug mode.") |
|
@click.option("--share", is_flag=True, default=False, help="Enable sharing of the interface.") |
|
def main(share,debug): |
|
|
|
demo1 = ui1() |
|
demo2 = ui2() |
|
custom_css = """.gradio-container { font-family: 'SF Pro Display', -apple-system, BlinkMacSystemFont, sans-serif; }""" |
|
demo = gr.TabbedInterface([demo1, demo2], ["Generate SRT File", "SRT Translation"], title="",theme=gr.themes.Soft(),css=custom_css) |
|
demo.queue().launch(share=share,debug=debug) |
|
if __name__ == "__main__": |
|
main() |