import gradio as gr import os import json import logging from moviepy.editor import VideoFileClip, AudioFileClip import shutil from src.subtitle_extractor import transcribe_audio, save_srt import time import subprocess from datetime import datetime import pandas as pd import tempfile import atexit # --- CONFIGURAZIONE INIZIALE --- TEMP_DIR = tempfile.mkdtemp() atexit.register(shutil.rmtree, TEMP_DIR, ignore_errors=True) try: from faster_whisper import WhisperModel except ImportError: WhisperModel = None logging.warning("Libreria 'faster_whisper' non trovata. La funzionalità sarà disabilitata.") logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logging.info(f"Directory temporanea creata: {TEMP_DIR}") stop_requested = False def format_timestamp(seconds): h = int(seconds // 3600); m = int((seconds % 3600) // 60); s = int(seconds % 60) ms = int((seconds - int(seconds)) * 1000) return f"{h:02}:{m:02}:{s:02},{ms:03}" # --- FUNZIONI BACKEND --- def extract_audio_only(video_path, progress=gr.Progress(track_tqdm=True)): if not video_path: gr.Warning("Carica prima un video per estrarre l'audio.") return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None) try: gr.Info("Estrazione audio in corso...") video = VideoFileClip(video_path) output_dir = os.path.join(os.getcwd(), "output") os.makedirs(output_dir, exist_ok=True) base_name = os.path.splitext(os.path.basename(video_path))[0] audio_filename = os.path.join(output_dir, f"{base_name}_audio.mp3") video.audio.write_audiofile(audio_filename, logger=None) gr.Info("Estrazione audio completata.") return gr.update(value=audio_filename, visible=True), audio_filename, gr.update(visible=True, value=audio_filename) except Exception as e: gr.Error(f"Errore durante l'estrazione dell'audio: {e}") return gr.update(visible=False, value=None), None, gr.update(visible=False, value=None) def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True)): if not video_path or not srt_path: gr.Warning("Percorso video o sottotitoli mancante!"); return None, None if not os.path.exists(srt_path): gr.Error(f"File sottotitoli non trovato: {srt_path}"); return None, None video_basename = os.path.splitext(os.path.basename(video_path))[0] srt_basename = os.path.splitext(os.path.basename(srt_path))[0] output_video_path = os.path.join(TEMP_DIR, f"{video_basename}_subbed_with_{srt_basename}.mp4") gr.Info("Inizio processo di unione video...") command = ["ffmpeg", "-y", "-i", video_path, "-vf", f"subtitles='{srt_path}'", "-c:a", "copy", "-c:v", "libx264", "-crf", "23", "-preset", "veryfast", output_video_path] try: subprocess.run(command, check=True, capture_output=True, text=True, encoding='utf-8') gr.Info("Video con sottotitoli generato con successo!") return output_video_path, srt_path except Exception as e: gr.Error(f"Errore ffmpeg: {e}"); return None, None def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, current_history): logging.debug("Starting transcription process...") start_time = time.time() global stop_requested if stop_requested: logging.warning("Transcription stopped by user.") return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None # --- VALIDAZIONE API KEY --- if library == "OpenAI Whisper" and (not api_key or not api_key.strip()): gr.Error("Devi inserire la API Key OpenAI per usare questa modalità.") return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None audio_source = None audio_editor_update = None original_audio_update = None if edited_audio_path and os.path.exists(edited_audio_path): logging.info("Using edited audio for transcription.") audio_source = edited_audio_path audio_editor_update = gr.update(value=edited_audio_path, visible=True) original_audio_update = edited_audio_path elif video_path and os.path.exists(video_path): logging.info("Extracting audio from original video for transcription...") try: video = VideoFileClip(video_path) temp_audio_path = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav") video.audio.write_audiofile(temp_audio_path, logger=None) output_dir = os.path.join(os.getcwd(), "output") os.makedirs(output_dir, exist_ok=True) base_name = os.path.splitext(os.path.basename(video_path))[0] audio_filename = os.path.join(output_dir, f"{base_name}_audio_gradio.wav") import shutil shutil.copy(temp_audio_path, audio_filename) rel_audio_path = os.path.relpath(audio_filename, os.getcwd()) audio_source = audio_filename audio_editor_update = gr.update(value=rel_audio_path, visible=True) original_audio_update = rel_audio_path except Exception as e: logging.error(f"Error extracting audio: {e}") return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None else: logging.error("No valid video or audio source provided.") return current_history, gr.update(interactive=True), update_dataframe(current_history), gr.update(visible=False, value=None), None, None try: if library == "OpenAI Whisper": logging.info("Using OpenAI Whisper for transcription.") try: srt_content, plain_text = transcribe_audio( audio_source, library="OpenAI Whisper", api_key=api_key, words_per_sub=int(words_per_sub), ) except Exception as e: logging.error(f"Errore chiamata OpenAI Whisper: {e}") gr.Error(f"Errore OpenAI Whisper: {e}") return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None else: logging.info("Using Faster Whisper for transcription.") srt_content, plain_text = transcribe_audio( audio_source, library="faster_whisper", api_key=None, words_per_sub=int(words_per_sub), ) logging.debug("Transcription completed successfully.") except Exception as e: logging.error(f"Error during transcription: {e}") gr.Error(f"Errore trascrizione: {e}") return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0] engine_suffix = "_openai" if library == "OpenAI Whisper" else "_fasterwhisper" srt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.srt") txt_filename = os.path.join(TEMP_DIR, f"{base_name}{engine_suffix}.txt") try: save_srt(srt_content, srt_filename) from src.subtitle_extractor import save_txt save_txt(plain_text, txt_filename) logging.info(f"SRT file saved at: {srt_filename}, TXT file saved at: {txt_filename}") except Exception as e: logging.error(f"Error saving SRT/TXT file: {e}") return current_history, gr.update(interactive=True), update_dataframe(current_history), None, None if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav": os.remove(audio_source) logging.info("Temporary audio file removed.") elapsed_time = time.time() - start_time new_entry_srt = { "File SRT": os.path.basename(srt_filename), "Libreria": library, "Tipologia SRT": "SRT con tempi", "Percorso Completo": srt_filename, "Video Unito": None, "Orario Generazione": datetime.now().strftime("%H:%M:%S"), "Orario Unione": "", } new_entry_txt = { "File SRT": os.path.basename(txt_filename), "Libreria": library, "Tipologia SRT": "Testo puro", "Percorso Completo": txt_filename, "Video Unito": None, "Orario Generazione": datetime.now().strftime("%H:%M:%S"), "Orario Unione": "", } updated_history = current_history.copy() updated_history.append(new_entry_srt) updated_history.append(new_entry_txt) logging.debug(f"Updated history: {updated_history}") # Riabilita sempre il pulsante dopo la generazione return updated_history, gr.update(interactive=True), update_dataframe(updated_history), audio_editor_update, original_audio_update # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui) def save_srt_changes(srt_path, new_content): if not srt_path: gr.Error("Percorso file non valido."); return try: with open(srt_path, 'w', encoding='utf-8') as f: f.write(new_content) gr.Info(f"File {os.path.basename(srt_path)} salvato!") except Exception as e: gr.Error(f"Errore salvataggio: {e}") def show_srt_for_editing(srt_path): logging.info(f"show_srt_for_editing triggered with srt_path: {srt_path}") if not srt_path or not os.path.exists(srt_path): logging.warning("Percorso SRT non valido o file inesistente.") return gr.update(value=None, visible=False) try: # Legge il contenuto del file SRT with open(srt_path, 'r', encoding='utf-8') as f: content = f.read() logging.info("Contenuto del file SRT caricato con successo.") # Rende visibile il box di modifica con il contenuto del file return gr.update(value=content, visible=True) except Exception as e: logging.error(f"Errore durante la lettura del file SRT: {e}") return gr.update(value=None, visible=False) js_loader_script = "function startLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='block',window.loaderInterval&&clearInterval(window.loaderInterval),document.getElementById('timer').innerText='0s',window.loaderInterval=setInterval(()=>{document.getElementById('timer').innerText=parseInt(document.getElementById('timer').innerText)+1+'s'},1e3))}function stopLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='none',window.loaderInterval&&clearInterval(window.loaderInterval))}" try: with open("manifest.json", "r", encoding="utf-8") as mf: manifest = json.load(mf) VERSION = manifest.get("version", "1.0.0") except FileNotFoundError: VERSION = "1.0.0" BADGE = f"v{VERSION}" # Loader HTML come template LOADER_HTML_ON = """
Generazione sottotitoli in corso...
""" LOADER_HTML_OFF = """ """ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"") as demo: srt_history_state = gr.State([]) selected_srt_path_state = gr.State(None) original_audio_path_state = gr.State() gr.Markdown(f"

Transcribe Speech {BADGE}

") gr.Markdown("### 1. Carica un file") video_input = gr.File(label="Carica un file video o audio", file_types=["video", "audio"]) with gr.Row(visible=False) as main_panel: with gr.Column(scale=1): gr.Markdown("### 2. Azioni Principali") extract_audio_btn = gr.Button("🎵 Estrai e Modifica Audio") gr.Markdown("---") library_selector = gr.Radio(choices=["Faster Whisper", "OpenAI Whisper"], label="Libreria per Sottotitoli", value="Faster Whisper") with gr.Group(visible=False) as openai_options: api_key_input = gr.Textbox(label="API Key OpenAI", type="password", placeholder="sk-...") cost_estimate = gr.Markdown() words_slider = gr.Slider(minimum=6, maximum=15, value=7, step=1, label="Parole per sottotitolo") # --- LOADER HTML SEMPRE PRESENTE SOPRA IL PULSANTE --- loader_html = gr.HTML(LOADER_HTML_OFF) submit_btn = gr.Button("▶️ Genera Sottotitoli", variant="primary") stop_btn = gr.Button("⏹️ Arresta", variant="stop", visible=False) loader = gr.HTML("""""") with gr.Column(scale=2): gr.Markdown("### 3. Anteprima ed Editor") video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False) audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath", visible=False) download_audio_btn = gr.Button("⬇️ Download Audio", variant="primary") audio_download_file = gr.File(label="Scarica Audio", visible=False) undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale") final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False, visible=False) final_video_loader = gr.HTML(""" """) with gr.Column(): gr.Markdown("--- \n### 4. Cronologia e Azioni sui Sottotitoli\n*Seleziona una riga per attivare le azioni.*") history_df = gr.Dataframe(headers=["File SRT", "Libreria", "Tipologia SRT", "Orario Generazione", "Video Unito", "Orario Unione"], interactive=True) with gr.Row(visible=False) as action_buttons: edit_btn = gr.Button("📝 Modifica SRT") merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary") delete_btn = gr.Button("🗑️ Elimina", variant="stop") download_btn = gr.Button("⬇️ Download SRT", variant="primary") srt_download_file = gr.File(label="Scarica SRT", visible=False) with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion: srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True) save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary") # --- FUNZIONI HELPER E LOGICA EVENTI --- # MODIFICATA: Logica semplificata e robusta def show_main_controls(file_obj): import mimetypes import shutil if file_obj: file_path = file_obj.name mime, _ = mimetypes.guess_type(file_path) is_video = mime and mime.startswith("video") is_audio = mime and mime.startswith("audio") video_preview_update = gr.update(visible=is_video, value=file_path if is_video else None) submit_btn_update = gr.update(interactive=True) main_panel_update = gr.update(visible=True) if is_audio: output_dir = os.path.join(os.getcwd(), "output") os.makedirs(output_dir, exist_ok=True) ext = os.path.splitext(file_path)[1].lower() if ext not in [".wav", ".mp3", ".flac", ".ogg"]: gr.Error("Formato audio non supportato. Usa WAV, MP3, FLAC o OGG.") return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None) base_name = os.path.splitext(os.path.basename(file_path))[0] timestamp = datetime.now().strftime("%Y%m%d%H%M%S") new_audio_name = f"{base_name}_{timestamp}{ext}" new_audio_path = os.path.join(output_dir, new_audio_name) shutil.copy(file_path, new_audio_path) rel_audio_path = os.path.relpath(new_audio_path, os.getcwd()) video_preview_update = gr.update(visible=False, value=None) audio_output_update = gr.update(value=rel_audio_path, visible=True) return video_preview_update, main_panel_update, submit_btn_update, audio_output_update return video_preview_update, main_panel_update, submit_btn_update, gr.update(visible=False, value=None) return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False), gr.update(visible=False, value=None) def on_select_srt(history_data, evt: gr.SelectData): if evt.index is None: return None, gr.update(visible=False), gr.update(visible=False), None selected_entry = history_data[evt.index[0]] srt_path = selected_entry["Percorso Completo"] # Controlla se il file SRT esiste if not os.path.exists(srt_path): gr.Warning("Il file SRT selezionato non esiste.") return None, gr.update(visible=False), gr.update(visible=False), None # Ritorna il percorso selezionato e rende visibili i pulsanti delle azioni return ( srt_path, # Percorso del file SRT selezionato gr.update(visible=True), # Rende visibili i pulsanti delle azioni gr.update(visible=False) # Nasconde il box di modifica inizialmente ) def update_dataframe(history_list): if not history_list: logging.debug("History list is empty. Returning empty dataframe.") return pd.DataFrame(columns=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"]) display_list = [] for entry in history_list: display_entry = entry.copy() display_entry["Video Unito"] = "✔️" if entry.get("Video Unito") else "" display_list.append(display_entry) logging.debug(f"Updated dataframe with entries: {display_list}") return pd.DataFrame(display_list)[ ["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"] ] def delete_selected(history_data, srt_path_to_delete): if not srt_path_to_delete: gr.Warning("Nessun file selezionato."); return history_data, gr.update(visible=False) entry_to_delete = next((e for e in history_data if e["Percorso Completo"] == srt_path_to_delete), None) if not entry_to_delete: gr.Error("Record non trovato."); return history_data, gr.update(visible=False) if os.path.exists(entry_to_delete["Percorso Completo"]): os.remove(entry_to_delete["Percorso Completo"]) if entry_to_delete.get("Video Unito") and os.path.exists(entry_to_delete["Video Unito"]): os.remove(entry_to_delete["Video Unito"]) updated_history = [e for e in history_data if e["Percorso Completo"] != srt_path_to_delete] gr.Info(f"Record '{entry_to_delete['File SRT']}' eliminato."); return updated_history, gr.update(visible=False) def handle_merge_success(output_video_path, srt_merged_path, current_history): if not output_video_path: return current_history, None for entry in current_history: if entry["Percorso Completo"] == srt_merged_path: entry["Video Unito"] = output_video_path; entry["Orario Unione"] = datetime.now().strftime("%H:%M:%S"); break return current_history, output_video_path # --- CABLAGGIO EVENTI --- video_input.upload( fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn, audio_output], ) extract_audio_btn.click( fn=extract_audio_only, inputs=[video_input], outputs=[audio_output, original_audio_path_state], ) undo_audio_btn.click( fn=lambda path: path, inputs=[original_audio_path_state], outputs=[audio_output], ) library_selector.change( lambda lib: gr.update(visible=lib == "OpenAI Whisper"), inputs=library_selector, outputs=openai_options, ) # Loader HTML sotto il pulsante loader_html = gr.HTML(LOADER_HTML_OFF) # Funzione wrapper per mostrare/nascondere loader e disabilitare/abilitare il pulsante def transcribe_with_loader(*args): from gradio import update # Mostra loader (display:block) yield None, update(interactive=False), None, gr.update(value=LOADER_HTML_ON), update(visible=True), None, gr.update(value=LOADER_HTML_ON) # Esegui la funzione vera result = transcribe(*args) audio_update = result[3] if result[3] is not None else update(visible=False, value=None) # Nascondi loader a fine processo (display:none) yield result[0], update(interactive=True), result[2], gr.update(value=LOADER_HTML_OFF), audio_update, result[4], gr.update(value=LOADER_HTML_OFF) # Modifica il submit_btn.click per usare la funzione wrapper e i nuovi output submit_btn.click( fn=transcribe_with_loader, inputs=[ video_input, audio_output, library_selector, api_key_input, words_slider, srt_history_state, ], outputs=[srt_history_state, submit_btn, history_df, loader_html, audio_output, original_audio_path_state, loader_html], queue=True, show_progress=False, ) # Aggiorna il cablaggio eventi per history_df history_df.select( fn=on_select_srt, inputs=[srt_history_state], outputs=[ selected_srt_path_state, # Percorso del file SRT selezionato action_buttons, # Rende visibili i pulsanti delle azioni srt_editor_accordion # Nasconde il box di modifica inizialmente ] ) # Aggiorna il cablaggio eventi per edit_btn edit_btn.click( fn=show_srt_for_editing, inputs=[selected_srt_path_state], outputs=[srt_editor_box] # Aggiorna solo il contenuto del box ) # Rende visibile il box di modifica quando viene cliccato il pulsante edit_btn.click( fn=lambda: gr.update(visible=True), inputs=[], outputs=[srt_editor_accordion] # Rende visibile l'accordion ) # --- FIX: Salva modifiche SRT --- save_edit_btn.click( fn=lambda srt_path, new_content: (save_srt_changes(srt_path, new_content), gr.update(interactive=False)), inputs=[selected_srt_path_state, srt_editor_box], outputs=[save_edit_btn], ) # --- FIX: Abilita/disabilita il pulsante Salva solo se ci sono modifiche --- def enable_save_btn(srt_path, new_content): if not srt_path or not os.path.exists(srt_path): return gr.update(interactive=False) try: with open(srt_path, 'r', encoding='utf-8') as f: original = f.read() if original != new_content: return gr.update(interactive=True) else: return gr.update(interactive=False) except Exception: return gr.update(interactive=False) srt_editor_box.change( fn=enable_save_btn, inputs=[selected_srt_path_state, srt_editor_box], outputs=[save_edit_btn], ) # Disabilita il pulsante Salva quando si seleziona un nuovo file edit_btn.click( fn=lambda: gr.update(interactive=False), inputs=[], outputs=[save_edit_btn], ) # Aggiorna il cablaggio eventi per merge_btn merge_btn.click( fn=lambda video_path, srt_path: ( gr.update(visible=True), # Mostra loader gr.update(visible=False), # Nascondi il player video ), inputs=[video_input, selected_srt_path_state], outputs=[final_video, final_video_loader], queue=True, show_progress=False, ) merge_btn.click( fn=lambda video_path, srt_path: ( gr.update(visible=True, value=merge_subtitles(video_path, srt_path)[0]), # Mostra video gr.update(visible=False), # Nascondi loader ), inputs=[video_input, selected_srt_path_state], outputs=[final_video, final_video_loader], queue=True, show_progress=False, ) # Rende visibile il video finale quando viene cliccato il pulsante merge_btn.click( fn=lambda: gr.update(visible=True), inputs=[], outputs=[final_video] # Rende visibile il componente del video finale ) # Riabilita il pulsante 'Genera Sottotitoli' dopo l'unione merge_btn.click( fn=lambda: gr.update(interactive=True), inputs=[], outputs=[submit_btn] ) # Aggiorna il cablaggio eventi per delete_btn delete_btn.click( fn=delete_selected, inputs=[srt_history_state, selected_srt_path_state], outputs=[srt_history_state, action_buttons] # Update history and hide action buttons ) # Download SRT: mostra il file selezionato come download download_btn.click( fn=lambda srt_path: gr.update(value=srt_path, visible=True) if srt_path and os.path.exists(srt_path) else gr.update(visible=False), inputs=[selected_srt_path_state], outputs=[srt_download_file], ) # Download Audio: mostra il file audio corrente come download download_audio_btn.click( fn=lambda audio_path: gr.update(value=audio_path, visible=True) if audio_path and os.path.exists(audio_path) else gr.update(visible=False), inputs=[audio_output], outputs=[audio_download_file], ) # --- PULIZIA FILE DI OUTPUT ALL'AVVIO --- def clean_output_dirs(): for folder in ["output", os.path.join("output", "subtitles")]: if os.path.exists(folder): for filename in os.listdir(folder): file_path = os.path.join(folder, filename) try: if os.path.isfile(file_path) or os.path.islink(file_path): os.unlink(file_path) elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(f"Errore durante la cancellazione di {file_path}: {e}") clean_output_dirs() if __name__ == "__main__": demo.queue().launch() # Rimosso `share=True` per eseguire l'app localmente