Jhjoon05 commited on
Commit
8d692ce
·
1 Parent(s): 8708d98

make it able to handle multiple files

Browse files
Files changed (2) hide show
  1. app.py +1 -1
  2. modules/model_Inference.py +31 -20
app.py CHANGED
@@ -28,7 +28,7 @@ with block:
28
  with gr.Tabs():
29
  with gr.TabItem("File"): # tab1
30
  with gr.Row():
31
- input_file = gr.File(type="file", label="Upload File here")
32
  with gr.Row():
33
  dd_model = gr.Dropdown(choices=whisper_inf.available_models,value="large-v2",label="Model")
34
  dd_lang = gr.Dropdown(choices=["Automatic Detection"]+whisper_inf.available_langs,value="Automatic Detection",label="Language")
 
28
  with gr.Tabs():
29
  with gr.TabItem("File"): # tab1
30
  with gr.Row():
31
+ input_file = gr.Files(type="file", label="Upload File here")
32
  with gr.Row():
33
  dd_model = gr.Dropdown(choices=whisper_inf.available_models,value="large-v2",label="Model")
34
  dd_lang = gr.Dropdown(choices=["Automatic Detection"]+whisper_inf.available_langs,value="Automatic Detection",label="Language")
modules/model_Inference.py CHANGED
@@ -15,7 +15,7 @@ class WhisperInference():
15
  self.available_models = whisper.available_models()
16
  self.available_langs = sorted(list(whisper.tokenizer.LANGUAGES.values()))
17
 
18
- def transcribe_file(self,fileobj
19
  ,model_size,lang,subformat,istranslate,
20
  progress=gr.Progress()):
21
 
@@ -31,30 +31,41 @@ class WhisperInference():
31
  lang = None
32
 
33
  progress(0,desc="Loading Audio..")
34
- audio = whisper.load_audio(fileobj.name)
35
 
36
- translatable_model = ["large","large-v1","large-v2"]
37
- if istranslate and self.current_model_size in translatable_model:
38
- result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
39
- else :
40
- result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
41
 
42
- progress(1,desc="Completed!")
 
 
 
 
43
 
44
- file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
45
- file_name = file_name[:-9]
46
- file_name = safe_filename(file_name)
47
- timestamp = datetime.now().strftime("%m%d%H%M%S")
48
- output_path = f"outputs/{file_name}-{timestamp}"
49
 
50
- if subformat == "SRT":
51
- subtitle = get_srt(result["segments"])
52
- write_srt(subtitle,f"{output_path}.srt")
53
- elif subformat == "WebVTT":
54
- subtitle = get_vtt(result["segments"])
55
- write_vtt(subtitle,f"{output_path}.vtt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
- return f"Done! Subtitle is in the outputs folder.\n\n{subtitle}"
58
 
59
  def transcribe_youtube(self,youtubelink
60
  ,model_size,lang,subformat,istranslate,
 
15
  self.available_models = whisper.available_models()
16
  self.available_langs = sorted(list(whisper.tokenizer.LANGUAGES.values()))
17
 
18
+ def transcribe_file(self,fileobjs
19
  ,model_size,lang,subformat,istranslate,
20
  progress=gr.Progress()):
21
 
 
31
  lang = None
32
 
33
  progress(0,desc="Loading Audio..")
 
34
 
35
+ files_info = {}
36
+ for fileobj in fileobjs:
37
+ audio = whisper.load_audio(fileobj.name)
 
 
38
 
39
+ translatable_model = ["large","large-v1","large-v2"]
40
+ if istranslate and self.current_model_size in translatable_model:
41
+ result = self.model.transcribe(audio=audio,language=lang,verbose=False,task="translate",progress_callback=progress_callback)
42
+ else :
43
+ result = self.model.transcribe(audio=audio,language=lang,verbose=False,progress_callback=progress_callback)
44
 
45
+ progress(1,desc="Completed!")
 
 
 
 
46
 
47
+ file_name, file_ext = os.path.splitext(os.path.basename(fileobj.orig_name))
48
+ file_name = file_name[:-9]
49
+ file_name = safe_filename(file_name)
50
+ timestamp = datetime.now().strftime("%m%d%H%M%S")
51
+ output_path = f"outputs/{file_name}-{timestamp}"
52
+
53
+ if subformat == "SRT":
54
+ subtitle = get_srt(result["segments"])
55
+ write_srt(subtitle,f"{output_path}.srt")
56
+ elif subformat == "WebVTT":
57
+ subtitle = get_vtt(result["segments"])
58
+ write_vtt(subtitle,f"{output_path}.vtt")
59
+
60
+ files_info[file_name] = subtitle
61
+
62
+ total_result = ''
63
+ for file_name,subtitle in files_info.items():
64
+ total_result+='------------------------------------\n'
65
+ total_result+=f'{file_name}\n\n'
66
+ total_result+=f'{subtitle}'
67
 
68
+ return f"Done! Subtitle is in the outputs folder.\n\n{total_result}"
69
 
70
  def transcribe_youtube(self,youtubelink
71
  ,model_size,lang,subformat,istranslate,