Spaces:
Runtime error
Runtime error
feat: add preloader for effiecient in time
Browse files- app.py +6 -3
- src/infer.py +13 -5
- src/utils.py +4 -4
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
from src import infer, utils
|
2 |
import gradio as gr
|
3 |
|
4 |
-
infer.
|
5 |
|
6 |
-
|
7 |
[None, "assets/audio/male-indonesian.wav", None],
|
8 |
[None, "assets/audio/female-indonesian.wav", None],
|
9 |
[None, "assets/audio/male-english.wav", None],
|
@@ -19,7 +19,10 @@ demo = gr.Interface(
|
|
19 |
inputs=[
|
20 |
gr.Dropdown(
|
21 |
label="Model",
|
22 |
-
choices=[
|
|
|
|
|
|
|
23 |
value="base"),
|
24 |
gr.Radio(label="Language",
|
25 |
choices=["indonesian","english"],
|
|
|
1 |
from src import infer, utils
|
2 |
import gradio as gr
|
3 |
|
4 |
+
infer.model_preloader_downloader()
|
5 |
|
6 |
+
AUDIO_EXAMPLE = [
|
7 |
[None, "assets/audio/male-indonesian.wav", None],
|
8 |
[None, "assets/audio/female-indonesian.wav", None],
|
9 |
[None, "assets/audio/male-english.wav", None],
|
|
|
19 |
inputs=[
|
20 |
gr.Dropdown(
|
21 |
label="Model",
|
22 |
+
choices=[
|
23 |
+
"tiny", "base", "small", "medium",
|
24 |
+
"large", "large-v1", "large-v2"
|
25 |
+
],
|
26 |
value="base"),
|
27 |
gr.Radio(label="Language",
|
28 |
choices=["indonesian","english"],
|
src/infer.py
CHANGED
@@ -2,15 +2,23 @@
|
|
2 |
from typing import *
|
3 |
from src import utils
|
4 |
import whisper
|
|
|
5 |
|
|
|
|
|
6 |
|
7 |
-
def
|
8 |
-
|
9 |
-
for mname in list_names:
|
10 |
mdl = whisper.load_model(mname)
|
11 |
del mdl
|
12 |
|
13 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
if mic_audio is not None:
|
15 |
voice = mic_audio
|
16 |
elif audio is not None:
|
@@ -20,6 +28,6 @@ def predict(model_name, language, mic_audio=None, audio=None):
|
|
20 |
|
21 |
voice = utils.preprocess_audio(voice)
|
22 |
|
23 |
-
model =
|
24 |
result = model.transcribe(voice, language=language)
|
25 |
return result["text"]
|
|
|
2 |
from typing import *
|
3 |
from src import utils
|
4 |
import whisper
|
5 |
+
|
6 |
|
7 |
+
MODEL_NAMES = ["tiny", "base", "small", "medium", "large", "large-v1","large-v2"]
|
8 |
+
MODEL_BASE = whisper.load_model("base")
|
9 |
|
10 |
+
def model_preloader_downloader():
|
11 |
+
for mname in MODEL_NAMES:
|
|
|
12 |
mdl = whisper.load_model(mname)
|
13 |
del mdl
|
14 |
|
15 |
+
def model_loader(name: str):
|
16 |
+
if name=="base":
|
17 |
+
return MODEL_BASE
|
18 |
+
else:
|
19 |
+
return whisper.load_model(name)
|
20 |
+
|
21 |
+
def predict(model_name: str, language: str, mic_audio=None, audio=None):
|
22 |
if mic_audio is not None:
|
23 |
voice = mic_audio
|
24 |
elif audio is not None:
|
|
|
28 |
|
29 |
voice = utils.preprocess_audio(voice)
|
30 |
|
31 |
+
model = model_loader(model_name)
|
32 |
result = model.transcribe(voice, language=language)
|
33 |
return result["text"]
|
src/utils.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
from pathlib import Path
|
2 |
import whisper
|
|
|
3 |
|
4 |
-
sample_rate: int = 16000
|
5 |
-
float_factor: float = 32678.0
|
6 |
|
7 |
|
8 |
-
def preprocess_audio(filepath: str):
|
9 |
# load audio and pad/trim it to fit 30 seconds
|
10 |
audio = whisper.load_audio(filepath)
|
11 |
audio = whisper.pad_or_trim(audio)
|
@@ -18,4 +17,5 @@ def parsing_text(filepath: str):
|
|
18 |
raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
|
19 |
|
20 |
return path.read_text()
|
21 |
-
|
|
|
|
1 |
from pathlib import Path
|
2 |
import whisper
|
3 |
+
from typing import Any
|
4 |
|
|
|
|
|
5 |
|
6 |
|
7 |
+
def preprocess_audio(filepath: str) -> Any:
|
8 |
# load audio and pad/trim it to fit 30 seconds
|
9 |
audio = whisper.load_audio(filepath)
|
10 |
audio = whisper.pad_or_trim(audio)
|
|
|
17 |
raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
|
18 |
|
19 |
return path.read_text()
|
20 |
+
|
21 |
+
|