danzapp70 commited on
Commit
0d6f640
·
verified ·
1 Parent(s): d75acb1

Deploy version v1.1.0

Browse files
Documentation.md ADDED
File without changes
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: MyTools
3
- emoji: 🌍
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
@@ -9,4 +9,7 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
  title: MyTools
3
+ emoji: "🌍"
4
  colorFrom: blue
5
  colorTo: indigo
6
  sdk: gradio
 
9
  pinned: false
10
  ---
11
 
12
+ This repository contains a simple tool to extract subtitles from a video file.
13
+ It provides a minimal CLI in `src/main.py` and a Gradio interface defined in `app.py`.
14
+
15
+ Refer to the [Hugging Face Spaces documentation](https://huggingface.co/docs/hub/spaces-config-reference) for configuration options.
app.py CHANGED
@@ -3,7 +3,8 @@ import os
3
  import json
4
  import logging
5
  from moviepy.editor import VideoFileClip, AudioFileClip
6
- import openai
 
7
  import time
8
  import shutil
9
  import subprocess
@@ -22,7 +23,7 @@ except ImportError:
22
  WhisperModel = None
23
  logging.warning("Libreria 'faster_whisper' non trovata. La funzionalità sarà disabilitata.")
24
 
25
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
26
  logging.info(f"Directory temporanea creata: {TEMP_DIR}")
27
 
28
  stop_requested = False
@@ -78,44 +79,87 @@ def merge_subtitles(video_path, srt_path, progress=gr.Progress(track_tqdm=True))
78
  gr.Error(f"Errore ffmpeg: {e}"); return None, None
79
 
80
  def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, current_history):
81
- start_time = time.time(); global stop_requested
82
- if stop_requested: return current_history, gr.update(interactive=True), None
83
-
84
- audio_source_for_transcription = ""
 
 
 
 
85
  if edited_audio_path and os.path.exists(edited_audio_path):
86
- gr.Info("Uso l'audio modificato per la trascrizione.")
87
- audio_source_for_transcription = edited_audio_path
88
  elif video_path and os.path.exists(video_path):
89
- gr.Info("Estraggo l'audio dal video originale per la trascrizione...")
90
  try:
91
  video = VideoFileClip(video_path)
92
- audio_source_for_transcription = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
93
- video.audio.write_audiofile(audio_source_for_transcription, logger=None)
 
94
  except Exception as e:
95
- gr.Error(f"Errore estrazione audio: {e}"); return current_history, gr.update(interactive=True), None
 
96
  else:
97
- gr.Error("Nessuna sorgente video o audio valida."); return current_history, gr.update(interactive=True), None
98
-
99
- # Logica di trascrizione effettiva
100
- # (Ometto il corpo delle funzioni transcribe_video e transcribe_with_openai_whisper per brevità,
101
- # ma la logica sottostante è la stessa delle versioni precedenti)
102
-
103
- # Simuliamo il risultato per mantenere la struttura
104
- srt_filename = os.path.join(TEMP_DIR, "placeholder.srt")
105
- with open(srt_filename, "w") as f: f.write("1\n00:00:01,000 --> 00:00:02,000\nTest\n\n")
106
- library_used = library
107
- cost = "$0.00"
108
- success_msg = "Trascrizione completata"
109
-
110
- if os.path.exists(audio_source_for_transcription) and "temp_transcribe_audio" in audio_source_for_transcription:
111
- os.remove(audio_source_for_transcription)
112
-
113
- gr.Info("Trascrizione completata.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  elapsed_time = time.time() - start_time
115
- new_entry = {"File SRT": os.path.basename(srt_filename), "Libreria": library_used, "Tempo Impiegato (s)": f"{elapsed_time:.2f}", "Costo": cost, "Orario Generazione": datetime.now().strftime("%H:%M:%S"), "Orario Unione": "", "Percorso Completo": srt_filename, "Video Unito": None}
116
- updated_history = [entry for entry in current_history if entry["File SRT"] != os.path.basename(srt_filename)]
 
 
 
 
 
 
 
 
 
117
  updated_history.append(new_entry)
118
- return updated_history, gr.update(interactive=False), success_msg
 
 
119
 
120
  # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
121
 
@@ -127,11 +171,21 @@ def save_srt_changes(srt_path, new_content):
127
  except Exception as e: gr.Error(f"Errore salvataggio: {e}")
128
 
129
  def show_srt_for_editing(srt_path):
 
130
  if not srt_path or not os.path.exists(srt_path):
131
- gr.Warning("Nessun SRT selezionato."); return None, gr.update(visible=False)
132
- with open(srt_path, 'r', encoding='utf-8') as f: content = f.read()
133
- return content, gr.update(visible=True, open=True)
134
 
 
 
 
 
 
 
 
 
 
 
135
 
136
  js_loader_script = "function startLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='block',window.loaderInterval&&clearInterval(window.loaderInterval),document.getElementById('timer').innerText='0s',window.loaderInterval=setInterval(()=>{document.getElementById('timer').innerText=parseInt(document.getElementById('timer').innerText)+1+'s'},1e3))}function stopLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='none',window.loaderInterval&&clearInterval(window.loaderInterval))}"
137
 
@@ -169,17 +223,7 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
169
  gr.Markdown("### 3. Anteprima ed Editor")
170
  video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
171
  with gr.Group(visible=False) as audio_editor_group:
172
- audio_output = gr.Audio(
173
- label="Editor Traccia Audio",
174
- type="filepath",
175
- editable=True, # abilita il trim
176
- interactive=True, # mostra la waveform e gli handle
177
- waveform_options={ # (opzionale) personalizza l’aspetto
178
- "show_controls": True,
179
- "skip_length": 1, # tasti +1s / –1s
180
- "trim_region_color": "#1976d2" # colore della selezione
181
- }
182
- )
183
  undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
184
  final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False)
185
 
@@ -191,7 +235,7 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
191
  merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
192
  delete_btn = gr.Button("🗑️ Elimina", variant="stop")
193
  with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
194
- srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True)
195
  save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
196
 
197
  # --- FUNZIONI HELPER E LOGICA EVENTI ---
@@ -205,16 +249,39 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
205
  return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False)
206
 
207
  def on_select_srt(history_data, evt: gr.SelectData):
208
- if evt.index is None: return None, gr.update(visible=False), gr.update(visible=False), None
 
 
209
  selected_entry = history_data[evt.index[0]]
210
- return selected_entry["Percorso Completo"], gr.update(visible=True), gr.update(visible=False), selected_entry.get("Video Unito")
211
-
 
 
 
 
 
 
 
 
 
 
 
 
212
  def update_dataframe(history_list):
213
- if not history_list: return pd.DataFrame(columns=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"])
 
 
 
214
  display_list = []
215
  for entry in history_list:
216
- display_entry = entry.copy(); display_entry["Video Unito"] = "✔️" if entry.get("Video Unito") else ""; display_list.append(display_entry)
217
- return pd.DataFrame(display_list)[["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"]]
 
 
 
 
 
 
218
 
219
  def delete_selected(history_data, srt_path_to_delete):
220
  if not srt_path_to_delete: gr.Warning("Nessun file selezionato."); return history_data, gr.update(visible=False)
@@ -235,15 +302,84 @@ with gr.Blocks(title="Audio/Subtitle Tool", theme=gr.themes.Soft(), head=f"<scri
235
  # --- CABLAGGIO EVENTI ---
236
 
237
  video_input.upload(fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn])
238
-
239
- extract_audio_btn.click(fn=extract_audio_only, inputs=[video_input], outputs=[audio_output, original_audio_path_state, audio_editor_group])
240
-
241
- undo_audio_btn.click(fn=lambda path: path, inputs=[original_audio_path_state], outputs=[audio_output])
242
 
243
- # (Lascio qui il resto del cablaggio eventi per completezza)
244
- # ...
245
- # submit_event = submit_btn.click(...)
246
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
  if __name__ == "__main__":
249
- demo.queue().launch(share=True)
 
3
  import json
4
  import logging
5
  from moviepy.editor import VideoFileClip, AudioFileClip
6
+
7
+ from src.subtitle_extractor import transcribe_audio, save_srt
8
  import time
9
  import shutil
10
  import subprocess
 
23
  WhisperModel = None
24
  logging.warning("Libreria 'faster_whisper' non trovata. La funzionalità sarà disabilitata.")
25
 
26
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
27
  logging.info(f"Directory temporanea creata: {TEMP_DIR}")
28
 
29
  stop_requested = False
 
79
  gr.Error(f"Errore ffmpeg: {e}"); return None, None
80
 
81
  def transcribe(video_path, edited_audio_path, library, api_key, words_per_sub, current_history):
82
+ logging.debug("Starting transcription process...")
83
+ start_time = time.time()
84
+ global stop_requested
85
+ if stop_requested:
86
+ logging.warning("Transcription stopped by user.")
87
+ return current_history, gr.update(interactive=True), update_dataframe(current_history)
88
+
89
+ audio_source = None
90
  if edited_audio_path and os.path.exists(edited_audio_path):
91
+ logging.info("Using edited audio for transcription.")
92
+ audio_source = edited_audio_path
93
  elif video_path and os.path.exists(video_path):
94
+ logging.info("Extracting audio from original video for transcription...")
95
  try:
96
  video = VideoFileClip(video_path)
97
+ audio_source = os.path.join(TEMP_DIR, "temp_transcribe_audio.wav")
98
+ video.audio.write_audiofile(audio_source, logger=None)
99
+ logging.info(f"Audio extracted to: {audio_source}")
100
  except Exception as e:
101
+ logging.error(f"Error extracting audio: {e}")
102
+ return current_history, gr.update(interactive=True), update_dataframe(current_history)
103
  else:
104
+ logging.error("No valid video or audio source provided.")
105
+ return current_history, gr.update(interactive=True), update_dataframe(current_history)
106
+
107
+ try:
108
+ if library == "OpenAI Whisper":
109
+ if not api_key:
110
+ logging.error("Missing OpenAI API Key.")
111
+ gr.Error("API Key OpenAI mancante.")
112
+ return current_history, gr.update(interactive=True), update_dataframe(current_history)
113
+ logging.info("Using OpenAI Whisper for transcription.")
114
+ srt_content = transcribe_audio(
115
+ audio_source,
116
+ library="OpenAI Whisper",
117
+ api_key=api_key,
118
+ words_per_sub=int(words_per_sub),
119
+ )
120
+ else:
121
+ logging.info("Using Faster Whisper for transcription.")
122
+ srt_content = transcribe_audio(
123
+ audio_source,
124
+ library="faster_whisper",
125
+ api_key=None,
126
+ words_per_sub=int(words_per_sub),
127
+ )
128
+ logging.debug("Transcription completed successfully.")
129
+ except Exception as e:
130
+ logging.error(f"Error during transcription: {e}")
131
+ gr.Error(f"Errore trascrizione: {e}")
132
+ return current_history, gr.update(interactive=True), update_dataframe(current_history)
133
+
134
+ base_name = os.path.splitext(os.path.basename(video_path or audio_source))[0]
135
+ srt_filename = os.path.join(TEMP_DIR, f"{base_name}.srt")
136
+ try:
137
+ save_srt(srt_content, srt_filename)
138
+ logging.info(f"SRT file saved successfully at: {srt_filename}")
139
+ except Exception as e:
140
+ logging.error(f"Error saving SRT file: {e}")
141
+ return current_history, gr.update(interactive=True), update_dataframe(current_history)
142
+
143
+ if audio_source.startswith(TEMP_DIR) and os.path.basename(audio_source) == "temp_transcribe_audio.wav":
144
+ os.remove(audio_source)
145
+ logging.info("Temporary audio file removed.")
146
+
147
  elapsed_time = time.time() - start_time
148
+ new_entry = {
149
+ "File SRT": os.path.basename(srt_filename),
150
+ "Libreria": library,
151
+ "Tempo Impiegato (s)": f"{elapsed_time:.2f}",
152
+ "Percorso Completo": srt_filename,
153
+ "Video Unito": None,
154
+ "Orario Generazione": datetime.now().strftime("%H:%M:%S"),
155
+ "Orario Unione": "",
156
+ }
157
+ logging.debug(f"Adding new entry to history: {new_entry}")
158
+ updated_history = [e for e in current_history if e["File SRT"] != os.path.basename(srt_filename)]
159
  updated_history.append(new_entry)
160
+ logging.debug(f"Updated history: {updated_history}")
161
+
162
+ return updated_history, gr.update(interactive=False), update_dataframe(updated_history)
163
 
164
  # ... (tutte le altre funzioni helper come save_srt_changes, etc. rimangono qui)
165
 
 
171
  except Exception as e: gr.Error(f"Errore salvataggio: {e}")
172
 
173
  def show_srt_for_editing(srt_path):
174
+ logging.info(f"show_srt_for_editing triggered with srt_path: {srt_path}")
175
  if not srt_path or not os.path.exists(srt_path):
176
+ logging.warning("Percorso SRT non valido o file inesistente.")
177
+ return gr.update(value=None, visible=False)
 
178
 
179
+ try:
180
+ # Legge il contenuto del file SRT
181
+ with open(srt_path, 'r', encoding='utf-8') as f:
182
+ content = f.read()
183
+ logging.info("Contenuto del file SRT caricato con successo.")
184
+ # Rende visibile il box di modifica con il contenuto del file
185
+ return gr.update(value=content, visible=True)
186
+ except Exception as e:
187
+ logging.error(f"Errore durante la lettura del file SRT: {e}")
188
+ return gr.update(value=None, visible=False)
189
 
190
  js_loader_script = "function startLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='block',window.loaderInterval&&clearInterval(window.loaderInterval),document.getElementById('timer').innerText='0s',window.loaderInterval=setInterval(()=>{document.getElementById('timer').innerText=parseInt(document.getElementById('timer').innerText)+1+'s'},1e3))}function stopLoader(){const l=document.getElementById('loader-container');l&&(l.style.display='none',window.loaderInterval&&clearInterval(window.loaderInterval))}"
191
 
 
223
  gr.Markdown("### 3. Anteprima ed Editor")
224
  video_preview = gr.Video(label="Anteprima Video/Audio Originale", interactive=False)
225
  with gr.Group(visible=False) as audio_editor_group:
226
+ audio_output = gr.Audio(label="Editor Traccia Audio", editable=True, type="filepath")
 
 
 
 
 
 
 
 
 
 
227
  undo_audio_btn = gr.Button("↩️ Ripristina Audio Originale")
228
  final_video = gr.Video(label="Video Finale con Sottotitoli", interactive=False)
229
 
 
235
  merge_btn = gr.Button("🎬 Unisci al Video", variant="secondary")
236
  delete_btn = gr.Button("🗑️ Elimina", variant="stop")
237
  with gr.Accordion("Editor Testo Sottotitoli", open=False, visible=False) as srt_editor_accordion:
238
+ srt_editor_box = gr.Textbox(lines=15, label="Contenuto file .srt", show_copy_button=True, interactive=True)
239
  save_edit_btn = gr.Button("💾 Salva Modifiche", variant="primary")
240
 
241
  # --- FUNZIONI HELPER E LOGICA EVENTI ---
 
249
  return gr.update(visible=False, value=None), gr.update(visible=False), gr.update(interactive=False)
250
 
251
  def on_select_srt(history_data, evt: gr.SelectData):
252
+ if evt.index is None:
253
+ return None, gr.update(visible=False), gr.update(visible=False), None
254
+
255
  selected_entry = history_data[evt.index[0]]
256
+ srt_path = selected_entry["Percorso Completo"]
257
+
258
+ # Controlla se il file SRT esiste
259
+ if not os.path.exists(srt_path):
260
+ gr.Warning("Il file SRT selezionato non esiste.")
261
+ return None, gr.update(visible=False), gr.update(visible=False), None
262
+
263
+ # Ritorna il percorso selezionato e rende visibili i pulsanti delle azioni
264
+ return (
265
+ srt_path, # Percorso del file SRT selezionato
266
+ gr.update(visible=True), # Rende visibili i pulsanti delle azioni
267
+ gr.update(visible=False) # Nasconde il box di modifica inizialmente
268
+ )
269
+
270
  def update_dataframe(history_list):
271
+ if not history_list:
272
+ logging.debug("History list is empty. Returning empty dataframe.")
273
+ return pd.DataFrame(columns=["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"])
274
+
275
  display_list = []
276
  for entry in history_list:
277
+ display_entry = entry.copy()
278
+ display_entry["Video Unito"] = "✔️" if entry.get("Video Unito") else ""
279
+ display_list.append(display_entry)
280
+
281
+ logging.debug(f"Updated dataframe with entries: {display_list}")
282
+ return pd.DataFrame(display_list)[
283
+ ["File SRT", "Libreria", "Orario Generazione", "Video Unito", "Orario Unione"]
284
+ ]
285
 
286
  def delete_selected(history_data, srt_path_to_delete):
287
  if not srt_path_to_delete: gr.Warning("Nessun file selezionato."); return history_data, gr.update(visible=False)
 
302
  # --- CABLAGGIO EVENTI ---
303
 
304
  video_input.upload(fn=show_main_controls, inputs=video_input, outputs=[video_preview, main_panel, submit_btn])
 
 
 
 
305
 
306
+ extract_audio_btn.click(
307
+ fn=extract_audio_only,
308
+ inputs=[video_input],
309
+ outputs=[audio_output, original_audio_path_state, audio_editor_group],
310
+ )
311
+
312
+ undo_audio_btn.click(
313
+ fn=lambda path: path,
314
+ inputs=[original_audio_path_state],
315
+ outputs=[audio_output],
316
+ )
317
+
318
+ library_selector.change(
319
+ lambda lib: gr.update(visible=lib == "OpenAI Whisper"),
320
+ inputs=library_selector,
321
+ outputs=openai_options,
322
+ )
323
+
324
+ submit_btn.click(
325
+ fn=transcribe,
326
+ inputs=[
327
+ video_input,
328
+ audio_output,
329
+ library_selector,
330
+ api_key_input,
331
+ words_slider,
332
+ srt_history_state,
333
+ ],
334
+ outputs=[srt_history_state, submit_btn, history_df],
335
+ )
336
+
337
+ # Aggiorna il cablaggio eventi per history_df
338
+ history_df.select(
339
+ fn=on_select_srt,
340
+ inputs=[srt_history_state],
341
+ outputs=[
342
+ selected_srt_path_state, # Percorso del file SRT selezionato
343
+ action_buttons, # Rende visibili i pulsanti delle azioni
344
+ srt_editor_accordion # Nasconde il box di modifica inizialmente
345
+ ]
346
+ )
347
+
348
+ # Aggiorna il cablaggio eventi per edit_btn
349
+ edit_btn.click(
350
+ fn=show_srt_for_editing,
351
+ inputs=[selected_srt_path_state],
352
+ outputs=[srt_editor_box] # Aggiorna solo il contenuto del box
353
+ )
354
+
355
+ # Rende visibile il box di modifica quando viene cliccato il pulsante
356
+ edit_btn.click(
357
+ fn=lambda: gr.update(visible=True),
358
+ inputs=[],
359
+ outputs=[srt_editor_accordion] # Rende visibile l'accordion
360
+ )
361
+
362
+ # Aggiorna il cablaggio eventi per merge_btn
363
+ merge_btn.click(
364
+ fn=merge_subtitles,
365
+ inputs=[video_input, selected_srt_path_state],
366
+ outputs=[final_video] # Aggiorna solo il contenuto del video finale
367
+ )
368
+
369
+ # Rende visibile il video finale quando viene cliccato il pulsante
370
+ merge_btn.click(
371
+ fn=lambda: gr.update(visible=True),
372
+ inputs=[],
373
+ outputs=[final_video] # Rende visibile il componente del video finale
374
+ )
375
+
376
+ # Aggiorna il cablaggio eventi per delete_btn
377
+ delete_btn.click(
378
+ fn=delete_selected,
379
+ inputs=[srt_history_state, selected_srt_path_state],
380
+ outputs=[srt_history_state, action_buttons] # Update history and hide action buttons
381
+ )
382
+
383
 
384
  if __name__ == "__main__":
385
+ demo.queue().launch() # Rimosso `share=True` per eseguire l'app localmente
manifest.json CHANGED
@@ -1,3 +1,3 @@
1
  {
2
- "version": "1.0.0"
3
  }
 
1
  {
2
+ "version": "1.1.0"
3
  }
requirements.txt CHANGED
@@ -3,4 +3,4 @@ pandas
3
  faster-whisper
4
  moviepy==1.0.3
5
  openai
6
- ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess
 
3
  faster-whisper
4
  moviepy==1.0.3
5
  openai
6
+ ffmpeg-python # Aggiunto per robustezza, anche se usiamo subprocess
src/__pycache__/subtitle_extractor.cpython-313.pyc ADDED
Binary file (7.08 kB). View file
 
src/main.py CHANGED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Simple CLI interface for the subtitle extractor."""
2
+
3
+ import argparse
4
+ import os
5
+ import tempfile
6
+
7
+ from .subtitle_extractor import (
8
+ extract_audio,
9
+ transcribe_audio,
10
+ save_srt,
11
+ merge_subtitles,
12
+ )
13
+
14
+
15
+ def main() -> None:
16
+ parser = argparse.ArgumentParser(description="Generate subtitles from a video")
17
+ parser.add_argument("video", help="Path to the video file")
18
+ parser.add_argument("--library", choices=["faster_whisper", "OpenAI Whisper"], default="faster_whisper")
19
+ parser.add_argument("--api-key", help="OpenAI API key if using OpenAI Whisper")
20
+ parser.add_argument("--output", help="Output directory", default="output")
21
+ parser.add_argument("--merge", action="store_true", help="Merge subtitles with video")
22
+ args = parser.parse_args()
23
+
24
+ audio_path = extract_audio(args.video, args.output)
25
+ srt_content = transcribe_audio(audio_path, library=args.library, api_key=args.api_key)
26
+ srt_path = save_srt(srt_content, os.path.join(args.output, "subtitles.srt"))
27
+ print(f"Generated subtitles: {srt_path}")
28
+
29
+ if args.merge:
30
+ merged = merge_subtitles(args.video, srt_path, os.path.join(args.output, "merged.mp4"))
31
+ print(f"Merged video saved to: {merged}")
32
+
33
+
34
+ if __name__ == "__main__":
35
+ main()
src/subtitle_extractor.py CHANGED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utility functions for extracting audio, transcribing and merging subtitles."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import subprocess
8
+ from dataclasses import dataclass
9
+ from typing import List, Optional
10
+
11
+ # MoviePy is an optional dependency used when extracting audio. It is imported
12
+ # lazily to avoid issues when running in environments where it is not
13
+ # available (for instance during unit tests).
14
+
15
+ try:
16
+ from faster_whisper import WhisperModel
17
+ except ImportError: # pragma: no cover - optional dependency
18
+ WhisperModel = None
19
+
20
+ logging.basicConfig(level=logging.DEBUG, format="%(asctime)s - %(levelname)s - %(message)s")
21
+
22
+
23
+ def format_timestamp(seconds: float) -> str:
24
+ """Return timestamp in SRT format."""
25
+ h = int(seconds // 3600)
26
+ m = int((seconds % 3600) // 60)
27
+ s = int(seconds % 60)
28
+ ms = int((seconds - int(seconds)) * 1000)
29
+ return f"{h:02}:{m:02}:{s:02},{ms:03}"
30
+
31
+
32
+ def extract_audio(video_path: str, output_dir: str) -> str:
33
+ """Extract audio from *video_path* and return the audio file path."""
34
+ if not os.path.exists(video_path):
35
+ raise FileNotFoundError(video_path)
36
+ os.makedirs(output_dir, exist_ok=True)
37
+ base_name = os.path.splitext(os.path.basename(video_path))[0]
38
+ audio_path = os.path.join(output_dir, f"{base_name}.wav")
39
+ # Import here so tests that do not require MoviePy can run without the
40
+ # dependency installed.
41
+ from moviepy.editor import VideoFileClip
42
+
43
+ clip = VideoFileClip(video_path)
44
+ clip.audio.write_audiofile(audio_path, logger=None)
45
+ clip.close()
46
+ return audio_path
47
+
48
+
49
+ @dataclass
50
+ class SubtitleLine:
51
+ start: float
52
+ end: float
53
+ text: str
54
+
55
+
56
+ def _segments_to_srt(segments: List[SubtitleLine]) -> str:
57
+ lines = []
58
+ for idx, seg in enumerate(segments, 1):
59
+ lines.append(str(idx))
60
+ lines.append(f"{format_timestamp(seg.start)} --> {format_timestamp(seg.end)}")
61
+ lines.append(seg.text.strip())
62
+ lines.append("")
63
+ return "\n".join(lines)
64
+
65
+
66
+ def transcribe_audio(
67
+ audio_path: str,
68
+ library: str = "faster_whisper",
69
+ api_key: Optional[str] = None,
70
+ model_size: str = "base",
71
+ words_per_sub: int = 7,
72
+ ) -> str:
73
+ """Transcribe *audio_path* and return SRT content."""
74
+ logging.debug(f"Starting transcription with library: {library}, audio_path: {audio_path}")
75
+
76
+ if library == "OpenAI Whisper":
77
+ if api_key is None:
78
+ raise ValueError("api_key is required for OpenAI Whisper")
79
+ import openai
80
+
81
+ openai.api_key = api_key
82
+ logging.debug("Calling OpenAI Whisper API...")
83
+ with open(audio_path, "rb") as audio_file:
84
+ result = openai.Audio.transcribe(
85
+ model="whisper-1",
86
+ file=audio_file,
87
+ response_format="json",
88
+ )
89
+ logging.debug(f"OpenAI API response: {result}")
90
+ words = result.get("text", "").split()
91
+ if not words:
92
+ logging.error("No text returned by OpenAI Whisper API.")
93
+ raise ValueError("No text returned by OpenAI Whisper API.")
94
+ segments = []
95
+ start = 0.0
96
+ step = 3.0
97
+ for i in range(0, len(words), words_per_sub):
98
+ end = start + step
99
+ text = " ".join(words[i : i + words_per_sub])
100
+ segments.append(SubtitleLine(start=start, end=end, text=text))
101
+ start = end
102
+ logging.debug(f"Generated segments: {segments}")
103
+ else:
104
+ if WhisperModel is None:
105
+ raise RuntimeError("faster_whisper is not installed")
106
+ logging.debug("Using Faster Whisper for transcription...")
107
+ model = WhisperModel(model_size)
108
+ segs = model.transcribe(audio_path)[0]
109
+ segments = [SubtitleLine(start=s.start, end=s.end, text=s.text) for s in segs]
110
+ logging.debug(f"Generated segments: {segments}")
111
+
112
+ if not segments:
113
+ logging.error("No segments generated during transcription.")
114
+ raise ValueError("No segments generated during transcription.")
115
+
116
+ srt_content = _segments_to_srt(segments)
117
+ logging.debug(f"Generated SRT content: {srt_content}")
118
+ return srt_content
119
+
120
+
121
+ def save_srt(content: str, output_path: str) -> str:
122
+ with open(output_path, "w", encoding="utf-8") as f:
123
+ f.write(content)
124
+ return output_path
125
+
126
+
127
+ def merge_subtitles(video_path: str, srt_path: str, output_path: str) -> str:
128
+ command = [
129
+ "ffmpeg",
130
+ "-y",
131
+ "-i",
132
+ video_path,
133
+ "-vf",
134
+ f"subtitles={srt_path}",
135
+ "-c:a",
136
+ "copy",
137
+ "-c:v",
138
+ "libx264",
139
+ output_path,
140
+ ]
141
+ subprocess.run(command, check=True)
142
+ return output_path
143
+
tests/test_subtitle_extractor.py CHANGED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+
4
+ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
5
+
6
+ from src.subtitle_extractor import format_timestamp
7
+
8
+
9
+ def test_format_timestamp():
10
+ assert format_timestamp(0) == "00:00:00,000"
11
+ assert format_timestamp(1.234) == "00:00:01,234"
12
+ assert format_timestamp(3661.5) == "01:01:01,500"