Athspi commited on
Commit
193965e
·
verified ·
1 Parent(s): cf7e069

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -15
app.py CHANGED
@@ -4,17 +4,35 @@ import os
4
  from faster_whisper import WhisperModel
5
  from moviepy.video.io.VideoFileClip import VideoFileClip
6
  import logging
 
7
 
8
  # Suppress moviepy logs
9
  logging.getLogger("moviepy").setLevel(logging.ERROR)
10
 
11
- # Define the model and device
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  MODEL_NAME = "Systran/faster-whisper-large-v3"
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
  compute_type = "float32" if device == "cuda" else "int8"
15
 
16
  # Load the Whisper model
17
- model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
18
 
19
  # List of all supported languages in Whisper
20
  SUPPORTED_LANGUAGES = [
@@ -46,7 +64,7 @@ def extract_audio_from_video(video_file):
46
  def generate_subtitles(audio_file, language="Auto Detect"):
47
  """Generate subtitles from an audio file using Whisper."""
48
  # Transcribe the audio
49
- segments, info = model.transcribe(
50
  audio_file,
51
  task="transcribe",
52
  language=None if language == "Auto Detect" else language.lower(),
@@ -67,7 +85,7 @@ def generate_subtitles(audio_file, language="Auto Detect"):
67
  # Add to SRT
68
  srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"
69
 
70
- return srt_subtitles
71
 
72
  def format_timestamp(seconds):
73
  """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
@@ -77,23 +95,51 @@ def format_timestamp(seconds):
77
  milliseconds = int((seconds - int(seconds)) * 1000)
78
  return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
79
 
80
- def process_video(video_file, language="Auto Detect"):
81
- """Process a video file to generate subtitles."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  # Extract audio from the video
83
  audio_file = extract_audio_from_video(video_file)
84
 
85
  # Generate subtitles
86
- subtitles = generate_subtitles(audio_file, language)
87
 
88
- # Save subtitles to an SRT file
89
- srt_file = "subtitles.srt"
90
- with open(srt_file, "w", encoding="utf-8") as f:
91
  f.write(subtitles)
92
 
 
 
 
 
 
 
 
 
93
  # Clean up extracted audio file
94
  os.remove(audio_file)
95
 
96
- return srt_file
97
 
98
  # Define the Gradio interface
99
  with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
@@ -101,7 +147,7 @@ with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
101
  with gr.Column():
102
  gr.Markdown("# 🎥 AutoSubGen")
103
  gr.Markdown("### AI-Powered Video Subtitle Generator")
104
- gr.Markdown("Automatically generate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
105
 
106
  # Main content
107
  with gr.Tab("Generate Subtitles"):
@@ -114,14 +160,23 @@ with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
114
  value="Auto Detect",
115
  scale=1
116
  )
 
 
 
 
 
 
117
  generate_button = gr.Button("Generate Subtitles", variant="primary")
118
- subtitle_output = gr.File(label="Download Subtitles (SRT)")
 
 
 
119
 
120
  # Link button to function
121
  generate_button.click(
122
  process_video,
123
- inputs=[video_input, language_dropdown],
124
- outputs=subtitle_output
125
  )
126
 
127
  # Launch the Gradio interface with a public link
 
4
  from faster_whisper import WhisperModel
5
  from moviepy.video.io.VideoFileClip import VideoFileClip
6
  import logging
7
+ import google.generativeai as genai
8
 
9
  # Suppress moviepy logs
10
  logging.getLogger("moviepy").setLevel(logging.ERROR)
11
 
12
+ # Configure Gemini API
13
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
14
+
15
+ # Create the Gemini model
16
+ generation_config = {
17
+ "temperature": 1,
18
+ "top_p": 0.95,
19
+ "top_k": 40,
20
+ "max_output_tokens": 8192,
21
+ "response_mime_type": "text/plain",
22
+ }
23
+
24
+ model = genai.GenerativeModel(
25
+ model_name="gemini-2.0-flash-exp",
26
+ generation_config=generation_config,
27
+ )
28
+
29
+ # Define the Whisper model and device
30
  MODEL_NAME = "Systran/faster-whisper-large-v3"
31
  device = "cuda" if torch.cuda.is_available() else "cpu"
32
  compute_type = "float32" if device == "cuda" else "int8"
33
 
34
  # Load the Whisper model
35
+ whisper_model = WhisperModel(MODEL_NAME, device=device, compute_type=compute_type)
36
 
37
  # List of all supported languages in Whisper
38
  SUPPORTED_LANGUAGES = [
 
64
  def generate_subtitles(audio_file, language="Auto Detect"):
65
  """Generate subtitles from an audio file using Whisper."""
66
  # Transcribe the audio
67
+ segments, info = whisper_model.transcribe(
68
  audio_file,
69
  task="transcribe",
70
  language=None if language == "Auto Detect" else language.lower(),
 
85
  # Add to SRT
86
  srt_subtitles += f"{i}\n{start_time_srt} --> {end_time_srt}\n{text}\n\n"
87
 
88
+ return srt_subtitles, info.language
89
 
90
  def format_timestamp(seconds):
91
  """Convert seconds to SRT timestamp format (HH:MM:SS,mmm)."""
 
95
  milliseconds = int((seconds - int(seconds)) * 1000)
96
  return f"{hours:02}:{minutes:02}:{int(seconds):02},{milliseconds:03}"
97
 
98
+ def translate_text(text, target_language):
99
+ """Translate text using Gemini."""
100
+ prompt = f"Translate the following text into {target_language}:\n\n{text}"
101
+ response = model.generate_content(prompt)
102
+ return response.text
103
+
104
+ def translate_srt(srt_text, target_language):
105
+ """Translate an SRT file while preserving timestamps."""
106
+ translated_srt = ""
107
+ for line in srt_text.split("\n"):
108
+ if " --> " in line: # Timestamp line
109
+ translated_srt += line + "\n"
110
+ elif line.strip().isdigit(): # Subtitle index line
111
+ translated_srt += line + "\n"
112
+ elif line.strip(): # Text line
113
+ translated_srt += translate_text(line, target_language) + "\n"
114
+ else: # Empty line
115
+ translated_srt += "\n"
116
+ return translated_srt
117
+
118
+ def process_video(video_file, language="Auto Detect", translate_to=None):
119
+ """Process a video file to generate and translate subtitles."""
120
  # Extract audio from the video
121
  audio_file = extract_audio_from_video(video_file)
122
 
123
  # Generate subtitles
124
+ subtitles, detected_language = generate_subtitles(audio_file, language)
125
 
126
+ # Save original subtitles to an SRT file
127
+ original_srt_file = "original_subtitles.srt"
128
+ with open(original_srt_file, "w", encoding="utf-8") as f:
129
  f.write(subtitles)
130
 
131
+ # Translate subtitles if a target language is provided
132
+ translated_srt_file = None
133
+ if translate_to and translate_to != "None":
134
+ translated_subtitles = translate_srt(subtitles, translate_to)
135
+ translated_srt_file = "translated_subtitles.srt"
136
+ with open(translated_srt_file, "w", encoding="utf-8") as f:
137
+ f.write(translated_subtitles)
138
+
139
  # Clean up extracted audio file
140
  os.remove(audio_file)
141
 
142
+ return original_srt_file, translated_srt_file, detected_language
143
 
144
  # Define the Gradio interface
145
  with gr.Blocks(title="AutoSubGen - AI Video Subtitle Generator") as demo:
 
147
  with gr.Column():
148
  gr.Markdown("# 🎥 AutoSubGen")
149
  gr.Markdown("### AI-Powered Video Subtitle Generator")
150
+ gr.Markdown("Automatically generate and translate subtitles for your videos in **SRT format**. Supports **100+ languages** and **auto-detection**.")
151
 
152
  # Main content
153
  with gr.Tab("Generate Subtitles"):
 
160
  value="Auto Detect",
161
  scale=1
162
  )
163
+ translate_to_dropdown = gr.Dropdown(
164
+ choices=["None"] + SUPPORTED_LANGUAGES[1:], # Exclude "Auto Detect"
165
+ label="Translate To",
166
+ value="None",
167
+ scale=1
168
+ )
169
  generate_button = gr.Button("Generate Subtitles", variant="primary")
170
+ with gr.Row():
171
+ original_subtitle_output = gr.File(label="Download Original Subtitles (SRT)")
172
+ translated_subtitle_output = gr.File(label="Download Translated Subtitles (SRT)")
173
+ detected_language_output = gr.Textbox(label="Detected Language")
174
 
175
  # Link button to function
176
  generate_button.click(
177
  process_video,
178
+ inputs=[video_input, language_dropdown, translate_to_dropdown],
179
+ outputs=[original_subtitle_output, translated_subtitle_output, detected_language_output]
180
  )
181
 
182
  # Launch the Gradio interface with a public link