Spaces:
Sleeping
Sleeping
1. Handling "save_downloaded_files" and "merge_subtitle_with_sources" Options during Transcription Process:
Browse filesPreviously, the "save_downloaded_files" and "merge_subtitle_with_sources" options were processed only right before everything was completed. Now, it has been changed to process after each file is transcribed. When multiple files are input, you can see the processing results more quickly.
2. Aligns the format of saved audio or video file names with subtitle file names when the "save_downloaded_files" or "merge_subtitle_with_sources" option is enabled.
3. Added "input_max_file_name_length" Option: This option adjusts the default maximum length limit for output file names, the default is 100. This option can be configured when launching the app.
- app.py +33 -29
- src/source.py +7 -2
- src/utils.py +5 -3
- src/vadParallel.py +2 -2
app.py
CHANGED
@@ -220,12 +220,41 @@ class WhisperTranscriber:
|
|
220 |
|
221 |
# Transcribe
|
222 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
223 |
-
|
|
|
224 |
|
225 |
# Update progress
|
226 |
current_progress += source_audio_duration
|
227 |
|
228 |
-
source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix, outputDirectory, highlight_words, scaled_progress_listener)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
229 |
|
230 |
if len(sources) > 1:
|
231 |
# Add new line separators
|
@@ -272,33 +301,6 @@ class WhisperTranscriber:
|
|
272 |
# Cleanup source
|
273 |
if self.deleteUploadedFiles:
|
274 |
for source in sources:
|
275 |
-
if self.app_config.merge_subtitle_with_sources and self.app_config.output_dir is not None and len(source_download) > 0:
|
276 |
-
print("\nmerge subtitle(srt) with source file [" + source.source_name + "]\n")
|
277 |
-
outRsult = ""
|
278 |
-
try:
|
279 |
-
srt_path = source_download[0]
|
280 |
-
save_path = os.path.join(self.app_config.output_dir, source.source_name)
|
281 |
-
save_without_ext, ext = os.path.splitext(save_path)
|
282 |
-
source_lang = "." + whisper_lang.code if whisper_lang is not None else ""
|
283 |
-
translate_lang = "." + nllb_lang.code if nllb_lang is not None else ""
|
284 |
-
output_with_srt = save_without_ext + source_lang + translate_lang + ext
|
285 |
-
|
286 |
-
#ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
|
287 |
-
input_file = ffmpeg.input(source.source_path)
|
288 |
-
input_srt = ffmpeg.input(srt_path)
|
289 |
-
out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
|
290 |
-
outRsult = out.run(overwrite_output=True)
|
291 |
-
except Exception as e:
|
292 |
-
# Ignore error - it's just a cleanup
|
293 |
-
print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
|
294 |
-
elif self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
|
295 |
-
print("Saving downloaded file [" + source.source_name + "]")
|
296 |
-
try:
|
297 |
-
shutil.copy(source.source_path, self.app_config.output_dir)
|
298 |
-
except Exception as e:
|
299 |
-
# Ignore error - it's just a cleanup
|
300 |
-
print("Error saving downloaded file: \n" + source.source_path + ", \n" + str(e))
|
301 |
-
|
302 |
print("Deleting temporary source file: " + source.source_path)
|
303 |
try:
|
304 |
os.remove(source.source_path)
|
@@ -765,6 +767,8 @@ if __name__ == '__main__':
|
|
765 |
help="True to move downloaded files to outputs directory. This argument will take effect only after output_dir is set.")
|
766 |
parser.add_argument("--merge_subtitle_with_sources", action='store_true', \
|
767 |
help="True to merge subtitle(srt) with sources and move the sources files to the outputs directory. This argument will take effect only after output_dir is set.")
|
|
|
|
|
768 |
parser.add_argument("--autolaunch", action='store_true', \
|
769 |
help="open the webui URL in the system's default browser upon launch")
|
770 |
|
|
|
220 |
|
221 |
# Transcribe
|
222 |
result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
|
223 |
+
short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
|
224 |
+
filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
|
225 |
|
226 |
# Update progress
|
227 |
current_progress += source_audio_duration
|
228 |
|
229 |
+
source_download, source_text, source_vtt = self.write_result(result, nllb_model, filePrefix + suffix.replace(".", "_"), outputDirectory, highlight_words, scaled_progress_listener)
|
230 |
+
|
231 |
+
if self.app_config.merge_subtitle_with_sources and self.app_config.output_dir is not None:
|
232 |
+
print("\nmerge subtitle(srt) with source file [" + source.source_name + "]\n")
|
233 |
+
outRsult = ""
|
234 |
+
try:
|
235 |
+
srt_path = source_download[0]
|
236 |
+
save_path = os.path.join(self.app_config.output_dir, filePrefix)
|
237 |
+
# save_without_ext, ext = os.path.splitext(save_path)
|
238 |
+
source_lang = "." + whisper_lang.code if whisper_lang is not None else ""
|
239 |
+
translate_lang = "." + nllb_lang.code if nllb_lang is not None else ""
|
240 |
+
output_with_srt = save_path + source_lang + translate_lang + suffix
|
241 |
+
|
242 |
+
#ffmpeg -i "input.mp4" -i "input.srt" -c copy -c:s mov_text output.mp4
|
243 |
+
input_file = ffmpeg.input(source.source_path)
|
244 |
+
input_srt = ffmpeg.input(srt_path)
|
245 |
+
out = ffmpeg.output(input_file, input_srt, output_with_srt, vcodec='copy', acodec='copy', scodec='mov_text')
|
246 |
+
outRsult = out.run(overwrite_output=True)
|
247 |
+
except Exception as e:
|
248 |
+
# Ignore error - it's just a cleanup
|
249 |
+
print("Error merge subtitle with source file: \n" + source.source_path + ", \n" + str(e), outRsult)
|
250 |
+
elif self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
|
251 |
+
print("Saving downloaded file [" + source.source_name + "]")
|
252 |
+
try:
|
253 |
+
save_path = os.path.join(self.app_config.output_dir, filePrefix)
|
254 |
+
shutil.copy(source.source_path, save_path + suffix)
|
255 |
+
except Exception as e:
|
256 |
+
# Ignore error - it's just a cleanup
|
257 |
+
print("Error saving downloaded file: \n" + source.source_path + ", \n" + str(e))
|
258 |
|
259 |
if len(sources) > 1:
|
260 |
# Add new line separators
|
|
|
301 |
# Cleanup source
|
302 |
if self.deleteUploadedFiles:
|
303 |
for source in sources:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
304 |
print("Deleting temporary source file: " + source.source_path)
|
305 |
try:
|
306 |
os.remove(source.source_path)
|
|
|
767 |
help="True to move downloaded files to outputs directory. This argument will take effect only after output_dir is set.")
|
768 |
parser.add_argument("--merge_subtitle_with_sources", action='store_true', \
|
769 |
help="True to merge subtitle(srt) with sources and move the sources files to the outputs directory. This argument will take effect only after output_dir is set.")
|
770 |
+
parser.add_argument("--input_max_file_name_length", type=int, default=100, \
|
771 |
+
help="Maximum length of a file name.")
|
772 |
parser.add_argument("--autolaunch", action='store_true', \
|
773 |
help="open the webui URL in the system's default browser upon launch")
|
774 |
|
src/source.py
CHANGED
@@ -32,10 +32,15 @@ class AudioSource:
|
|
32 |
return self.source_name
|
33 |
|
34 |
def get_short_name(self, max_length: int = MAX_FILE_PREFIX_LENGTH):
|
|
|
|
|
|
|
|
|
|
|
35 |
file_path = pathlib.Path(self.source_name)
|
36 |
-
short_name = file_path.stem[:max_length]
|
37 |
|
38 |
-
return short_name
|
39 |
|
40 |
def __str__(self) -> str:
|
41 |
return self.source_path
|
|
|
32 |
return self.source_name
|
33 |
|
34 |
def get_short_name(self, max_length: int = MAX_FILE_PREFIX_LENGTH):
|
35 |
+
short_name, suffix = self.get_short_name_suffix(max_length=max_length)
|
36 |
+
|
37 |
+
return short_name + suffix
|
38 |
+
|
39 |
+
def get_short_name_suffix(self, max_length: int = MAX_FILE_PREFIX_LENGTH):
|
40 |
file_path = pathlib.Path(self.source_name)
|
41 |
+
short_name = file_path.stem[:max_length]
|
42 |
|
43 |
+
return short_name, file_path.suffix
|
44 |
|
45 |
def __str__(self) -> str:
|
46 |
return self.source_path
|
src/utils.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import textwrap
|
2 |
import unicodedata
|
3 |
import re
|
4 |
|
@@ -211,7 +211,7 @@ def process_text(text: str, maxLineWidth=None):
|
|
211 |
lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
|
212 |
return '\n'.join(lines)
|
213 |
|
214 |
-
def slugify(value, allow_unicode=False):
|
215 |
"""
|
216 |
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
217 |
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
|
@@ -224,7 +224,9 @@ def slugify(value, allow_unicode=False):
|
|
224 |
value = unicodedata.normalize('NFKC', value)
|
225 |
else:
|
226 |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
|
227 |
-
|
|
|
|
|
228 |
return re.sub(r'[-\s]+', '-', value).strip('-_')
|
229 |
|
230 |
def download_file(url: str, destination: str):
|
|
|
1 |
+
import textwrap
|
2 |
import unicodedata
|
3 |
import re
|
4 |
|
|
|
211 |
lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
|
212 |
return '\n'.join(lines)
|
213 |
|
214 |
+
def slugify(value, allow_unicode=False, is_lower=False):
|
215 |
"""
|
216 |
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
217 |
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
|
|
|
224 |
value = unicodedata.normalize('NFKC', value)
|
225 |
else:
|
226 |
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
|
227 |
+
if is_lower:
|
228 |
+
value = value.lower()
|
229 |
+
value = re.sub(r'[^\w\s-]', '', value.replace("/","_").replace("⧸","_"))
|
230 |
return re.sub(r'[-\s]+', '-', value).strip('-_')
|
231 |
|
232 |
def download_file(url: str, destination: str):
|
src/vadParallel.py
CHANGED
@@ -180,13 +180,13 @@ class ParallelTranscription(AbstractTranscription):
|
|
180 |
total_progress += delta
|
181 |
if progress_listener is not None:
|
182 |
idx+=1
|
183 |
-
progress_listener.on_progress(total_progress, total_duration, desc=f"Transcribe parallel: {idx}, {total_progress:.2f}/{total_duration}")
|
184 |
|
185 |
results = results_async.get()
|
186 |
|
187 |
# Call the finished callback
|
188 |
if progress_listener is not None:
|
189 |
-
progress_listener.on_finished(desc=f"Transcribe parallel: {idx}, {total_progress:.2f}/{total_duration}.")
|
190 |
|
191 |
for result in results:
|
192 |
# Merge the results
|
|
|
180 |
total_progress += delta
|
181 |
if progress_listener is not None:
|
182 |
idx+=1
|
183 |
+
progress_listener.on_progress(total_progress, total_duration, desc=f"Transcribe parallel: {idx}, {total_progress:.2f}/{total_duration:.2f}")
|
184 |
|
185 |
results = results_async.get()
|
186 |
|
187 |
# Call the finished callback
|
188 |
if progress_listener is not None:
|
189 |
+
progress_listener.on_finished(desc=f"Transcribe parallel: {idx}, {total_progress:.2f}/{total_duration:.2f}.")
|
190 |
|
191 |
for result in results:
|
192 |
# Merge the results
|