Spaces:

vinod-anbalagan
/

global-sound

Sleeping

File size: 8,282 Bytes

78927d8
2e012b2
 
126da6f
84046d9
1e13aa4
2e012b2
 
 
 
 
38ca409
2e012b2
18d88cd
 
 
 
2e012b2
 
38ca409
78927d8
a9fb107
18d88cd
84046d9
918379d
38ca409
918379d
 
a9fb107
84046d9
a9fb107
918379d
a9fb107
 
38ca409
918379d
ff2f090
 
78927d8
a9fb107
ff2f090
2e012b2
18d88cd
ff2f090
126da6f
38ca409
84046d9
126da6f
959d35c
126da6f
2e012b2
 
918379d
 
 
 
 
 
 
959d35c
2e012b2
 
 
18d88cd
 
959d35c
 
78927d8
959d35c
 
 
 
 
 
2e012b2
 
959d35c
78927d8
2e012b2
 
42728aa
918379d
27f1ea6
4d8f8dc
78927d8
2e012b2
 
38ca409
918379d
 
d0a2c8f
 
 
38ca409
 
 
d0a2c8f
78927d8
2e012b2
 
918379d
 
 
 
 
78927d8
918379d
84046d9
1c57e9d
918379d
84046d9
 
2e012b2
78927d8
 
 
 
 
 
 
 
a9fb107
18d88cd
78927d8
ff2f090
a9fb107
84046d9
 
 
a9fb107
918379d
126da6f
 
 
 
18d88cd
 
78927d8
 
959d35c
18d88cd
 
 
126da6f
2e012b2
ff2f090
78927d8
 
2e012b2
126da6f
 
918379d
 
 
 
 
 
 
 
 
 
2e012b2
42728aa
2e012b2

# app.py 
import gradio as gr
import os
import shutil

# Importing custom modules
from src.audio_processor import AudioProcessor
from src.transcriber import Transcriber
from src.translator import Translator
from src.subtitle_generator import SubtitleGenerator

# --- 1. INITIALIZE ALL MODELS ONCE ON STARTUP ---
print("Initializing all models, please wait...")
audio_processor = AudioProcessor()
transcriber = Transcriber(model_size="base")
translator = Translator()
subtitle_generator = SubtitleGenerator()
print("\nAll models initialized. The application is ready.")


# --- USAGE COUNTER (runs in the background) ---
COUNTER_FILE = "usage_count.txt"

def get_usage_count():
    if not os.path.exists(COUNTER_FILE): return 0
    with open(COUNTER_FILE, "r") as f:
        try: return int(f.read().strip())
        except (ValueError, TypeError): return 0

def increment_usage_count():
    count = get_usage_count() + 1
    with open(COUNTER_FILE, "w") as f: f.write(str(count))
    return count


# --- MAIN PROCESSING FUNCTION (WITH ROBUSTNESS CHECK) ---
def generate_subtitles_for_video(video_upload_path, apply_noise_reduction, target_language, preserve_technical_terms, quick_process, progress=gr.Progress()):
    if not video_upload_path:
        raise gr.Error("You must upload a video file to begin.")

    video_path = video_upload_path
    temp_files_to_clean = []

    try:
        duration_limit = 60 if quick_process else None
        if quick_process:
            print("Quick Process Mode enabled: processing first 60 seconds only.")

        progress(0.1, desc="Step 1/3: Extracting and cleaning audio...")
        processed_audio_path = audio_processor.extract_and_process_audio(video_path, apply_noise_reduction, duration_limit)
        temp_files_to_clean.append(processed_audio_path)

        # FIX: Check for silent/empty audio file before transcription
        if not os.path.exists(processed_audio_path) or os.path.getsize(processed_audio_path) == 0:
            raise gr.Error(
                "Failed to extract valid audio. The video might be silent or have no audio track. "
                "If 'Quick Process' is enabled, the first 60 seconds may be silent."
            )

        progress(0.3, desc="Step 2/3: Transcribing audio...")
        original_segments, src_lang = transcriber.transcribe_audio(processed_audio_path)

        original_srt_path = subtitle_generator.create_srt_file(f"subtitles_{src_lang}", original_segments)
        output_files = [original_srt_path]
        final_video_subtitle_path = original_srt_path
        
        if target_language and target_language != src_lang:
            progress(0.7, desc=f"Step 3/3: Translating to {target_language.upper()}...")
            translated_segments = translator.translate_segments(original_segments, src_lang, target_language, preserve_technical_terms)
            translated_srt_path = subtitle_generator.create_srt_file(f"subtitles_{target_language}", translated_segments)
            output_files.append(translated_srt_path)
            final_video_subtitle_path = translated_srt_path

        progress(1.0, desc="Processing complete!")
        summary = (
            f"Processing Complete!\n\n"
            f"Source Language Detected: {src_lang.upper()}\n"
            f"Translation Language: {target_language.upper() if target_language else 'N/A (Transcription only)'}"
        )
        preview_text = "Transcription Preview:\n" + "\n".join([seg['text'] for seg in original_segments[:5]])
        
        increment_usage_count()
        video_player_update = (video_path, final_video_subtitle_path)
        
        return video_player_update, output_files, summary, preview_text

    except Exception as e:
        print(f"An error occurred in the main pipeline: {e}")
        if isinstance(e, gr.Error): raise e
        else: raise gr.Error(f"An unexpected error occurred: {str(e)}")
    finally:
        for path in temp_files_to_clean:
            if os.path.isfile(path):
                try: os.remove(path)
                except OSError as e_os: print(f"Error removing file {path}: {e_os}")


# --- GRADIO UI ---
with gr.Blocks(theme=gr.themes.Soft(), title="Global Sound 🌍", css="style.css") as demo:
    gr.Markdown("# Global Sound — AI-Powered Video Translator")
    gr.Markdown("Translate or transcribe your videos by generating downloadable subtitle files (`.srt`).")
    gr.Markdown("### How to Use\n"
                "1. **Upload Video**: Drag and drop or click to upload your video file.\n"
                "2. **Choose Options**: Select a language for translation, or leave as `No Translation` for transcription only.\n"
                "3. **Generate**: Click the **Generate Subtitles** button and wait for the process to complete.")

    # --- PERFORMANCE WARNING ---
    gr.Markdown(
        "<div style='text-align:center; padding: 10px; border-radius: 5px; background-color: #fff3cd; color: #664d03; border: 1px solid #ffecb5;'>"
        "**⚠️ Important Performance Note:** This app runs on a free CPU. For best results, **please use video clips under 15 minutes.** Longer videos will likely cause a connection timeout. Use the 'Quick Process' option for a fast test."
        "</div>"
    )

    language_choices = [
        ("Arabic", "ar"), ("Chinese", "zh"), ("English", "en"), ("French", "fr"), 
        ("German", "de"), ("Hindi", "hi"), ("Japanese", "ja"), ("Korean", "ko"), 
        ("Portuguese", "pt"), ("Russian", "ru"), ("Spanish", "es"), ("Tamil", "ta"), 
        ("Ukrainian", "uk"), ("Vietnamese", "vi")
    ]
    translation_options = [("No Translation", None)] + sorted(language_choices)

    with gr.Row(equal_height=True):
        with gr.Column(scale=2):
            gr.Markdown("### 1. Input & Options")
            video_upload_input = gr.Video(label="Upload Video", sources=['upload'])
            with gr.Accordion("Settings", open=True):
                quick_process_checkbox = gr.Checkbox(label="Quick Process (First 60s Only)", value=True, info="Ideal for testing or a fast preview.")
                noise_reduction = gr.Checkbox(label="Apply Noise Reduction", value=True, info="Recommended for videos with background noise.")
                preserve_technical = gr.Checkbox(label="Preserve Technical Terms", value=True, info="Protects words like 'GAN' or 'PyTorch' from translation.")
            
            language_dropdown = gr.Dropdown(label="Translate To", info="Select a language for translation.", choices=translation_options, value=None)
            
            with gr.Row():
                process_btn = gr.Button("Generate Subtitles", variant="primary", scale=3)
                stop_btn = gr.Button("Stop", variant="stop", scale=1)

        with gr.Column(scale=3):
            gr.Markdown("### 2. Results")
            video_output = gr.Video(label="Video with Subtitles", interactive=False)
            output_files = gr.File(label="Download Subtitle Files (.srt)", file_count="multiple", interactive=False)
            summary_output = gr.Textbox(label="Processing Summary", lines=4, interactive=False)
            preview_output = gr.Textbox(label="Transcription Preview", lines=4, interactive=False)

    process_event = process_btn.click(
        fn=generate_subtitles_for_video,
        inputs=[video_upload_input, noise_reduction, language_dropdown, preserve_technical, quick_process_checkbox],
        outputs=[video_output, output_files, summary_output, preview_output]
    )

    stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[process_event])

    gr.Markdown("---")
    
    # --- NEW: DEVELOPER & POWER USER NOTE ---
    gr.Markdown(
        "### For Developers & Power Users\n"
        "This online demo is limited by the free CPU hardware. For faster performance, no time limits, and to process longer videos, you can run this application on your own machine.\n\n"
        "**Feel free to fork the repository on GitHub and run it locally, especially if you have a computer with a GPU.**\n\n"
        "➡️ **[Find the code on GitHub](https://github.com/VinodAnbalagan/global-sound.git)**" 
    )

    gr.Markdown("---")
    gr.Markdown("Made from contributors using Gemini, OpenAI Whisper, mBART, and Gradio.")

if __name__ == "__main__":
    demo.launch(debug=True)