nunenuh commited on
Commit
7640294
1 Parent(s): 861e352

feat: add preloader for effiecient in time

Browse files
Files changed (3) hide show
  1. app.py +6 -3
  2. src/infer.py +13 -5
  3. src/utils.py +4 -4
app.py CHANGED
@@ -1,9 +1,9 @@
1
  from src import infer, utils
2
  import gradio as gr
3
 
4
- infer.download_all_available_model()
5
 
6
- audio_examples = [
7
  [None, "assets/audio/male-indonesian.wav", None],
8
  [None, "assets/audio/female-indonesian.wav", None],
9
  [None, "assets/audio/male-english.wav", None],
@@ -19,7 +19,10 @@ demo = gr.Interface(
19
  inputs=[
20
  gr.Dropdown(
21
  label="Model",
22
- choices=["tiny","small","base","medium","large","large-v2"],
 
 
 
23
  value="base"),
24
  gr.Radio(label="Language",
25
  choices=["indonesian","english"],
 
1
  from src import infer, utils
2
  import gradio as gr
3
 
4
+ infer.model_preloader_downloader()
5
 
6
+ AUDIO_EXAMPLE = [
7
  [None, "assets/audio/male-indonesian.wav", None],
8
  [None, "assets/audio/female-indonesian.wav", None],
9
  [None, "assets/audio/male-english.wav", None],
 
19
  inputs=[
20
  gr.Dropdown(
21
  label="Model",
22
+ choices=[
23
+ "tiny", "base", "small", "medium",
24
+ "large", "large-v1", "large-v2"
25
+ ],
26
  value="base"),
27
  gr.Radio(label="Language",
28
  choices=["indonesian","english"],
src/infer.py CHANGED
@@ -2,15 +2,23 @@
2
  from typing import *
3
  from src import utils
4
  import whisper
 
5
 
 
 
6
 
7
- def download_all_available_model():
8
- list_names = ["tiny", "base", "small", "medium", "large", "large-v1","large-v2"]
9
- for mname in list_names:
10
  mdl = whisper.load_model(mname)
11
  del mdl
12
 
13
- def predict(model_name, language, mic_audio=None, audio=None):
 
 
 
 
 
 
14
  if mic_audio is not None:
15
  voice = mic_audio
16
  elif audio is not None:
@@ -20,6 +28,6 @@ def predict(model_name, language, mic_audio=None, audio=None):
20
 
21
  voice = utils.preprocess_audio(voice)
22
 
23
- model = whisper.load_model(model_name)
24
  result = model.transcribe(voice, language=language)
25
  return result["text"]
 
2
  from typing import *
3
  from src import utils
4
  import whisper
5
+
6
 
7
+ MODEL_NAMES = ["tiny", "base", "small", "medium", "large", "large-v1","large-v2"]
8
+ MODEL_BASE = whisper.load_model("base")
9
 
10
+ def model_preloader_downloader():
11
+ for mname in MODEL_NAMES:
 
12
  mdl = whisper.load_model(mname)
13
  del mdl
14
 
15
+ def model_loader(name: str):
16
+ if name=="base":
17
+ return MODEL_BASE
18
+ else:
19
+ return whisper.load_model(name)
20
+
21
+ def predict(model_name: str, language: str, mic_audio=None, audio=None):
22
  if mic_audio is not None:
23
  voice = mic_audio
24
  elif audio is not None:
 
28
 
29
  voice = utils.preprocess_audio(voice)
30
 
31
+ model = model_loader(model_name)
32
  result = model.transcribe(voice, language=language)
33
  return result["text"]
src/utils.py CHANGED
@@ -1,11 +1,10 @@
1
  from pathlib import Path
2
  import whisper
 
3
 
4
- sample_rate: int = 16000
5
- float_factor: float = 32678.0
6
 
7
 
8
- def preprocess_audio(filepath: str):
9
  # load audio and pad/trim it to fit 30 seconds
10
  audio = whisper.load_audio(filepath)
11
  audio = whisper.pad_or_trim(audio)
@@ -18,4 +17,5 @@ def parsing_text(filepath: str):
18
  raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
19
 
20
  return path.read_text()
21
-
 
 
1
  from pathlib import Path
2
  import whisper
3
+ from typing import Any
4
 
 
 
5
 
6
 
7
+ def preprocess_audio(filepath: str) -> Any:
8
  # load audio and pad/trim it to fit 30 seconds
9
  audio = whisper.load_audio(filepath)
10
  audio = whisper.pad_or_trim(audio)
 
17
  raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
18
 
19
  return path.read_text()
20
+
21
+