Spaces:
Runtime error
Runtime error
bofenghuang
commited on
Commit
•
ec85714
1
Parent(s):
8aaccc9
up
Browse files- README.md +1 -1
- run_demo.py +2 -2
- run_demo_multi_models.py +3 -6
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Whisper
|
3 |
emoji: 🤫
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
|
|
1 |
---
|
2 |
+
title: Whisper German Demo
|
3 |
emoji: 🤫
|
4 |
colorFrom: indigo
|
5 |
colorTo: red
|
run_demo.py
CHANGED
@@ -5,7 +5,7 @@ import pytube as pt
|
|
5 |
from transformers import pipeline
|
6 |
from huggingface_hub import model_info
|
7 |
|
8 |
-
MODEL_NAME = "bofenghuang/whisper-medium-cv11-
|
9 |
CHUNK_LENGTH_S = 30
|
10 |
|
11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
@@ -16,7 +16,7 @@ pipe = pipeline(
|
|
16 |
device=device,
|
17 |
)
|
18 |
|
19 |
-
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="
|
20 |
|
21 |
def transcribe(microphone, file_upload):
|
22 |
warn_output = ""
|
|
|
5 |
from transformers import pipeline
|
6 |
from huggingface_hub import model_info
|
7 |
|
8 |
+
MODEL_NAME = "bofenghuang/whisper-medium-cv11-german-punct"
|
9 |
CHUNK_LENGTH_S = 30
|
10 |
|
11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
16 |
device=device,
|
17 |
)
|
18 |
|
19 |
+
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="de", task="transcribe")
|
20 |
|
21 |
def transcribe(microphone, file_upload):
|
22 |
warn_output = ""
|
run_demo_multi_models.py
CHANGED
@@ -11,15 +11,12 @@ from transformers.utils.logging import disable_progress_bar
|
|
11 |
warnings.filterwarnings("ignore")
|
12 |
disable_progress_bar()
|
13 |
|
14 |
-
DEFAULT_MODEL_NAME = "bofenghuang/whisper-medium-cv11-
|
15 |
MODEL_NAMES = [
|
16 |
"openai/whisper-small",
|
17 |
"openai/whisper-medium",
|
18 |
"openai/whisper-large-v2",
|
19 |
-
"bofenghuang/whisper-
|
20 |
-
"bofenghuang/whisper-small-cv11-french-punct",
|
21 |
-
"bofenghuang/whisper-medium-cv11-french",
|
22 |
-
"bofenghuang/whisper-medium-cv11-french-punct",
|
23 |
]
|
24 |
CHUNK_LENGTH_S = 30
|
25 |
MAX_NEW_TOKENS = 225
|
@@ -48,7 +45,7 @@ def maybe_load_cached_pipeline(model_name):
|
|
48 |
device=device,
|
49 |
)
|
50 |
# set forced_decoder_ids
|
51 |
-
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="
|
52 |
# limit genneration max length
|
53 |
pipe.model.config.max_length = MAX_NEW_TOKENS + 1
|
54 |
|
|
|
11 |
warnings.filterwarnings("ignore")
|
12 |
disable_progress_bar()
|
13 |
|
14 |
+
DEFAULT_MODEL_NAME = "bofenghuang/whisper-medium-cv11-german-punct"
|
15 |
MODEL_NAMES = [
|
16 |
"openai/whisper-small",
|
17 |
"openai/whisper-medium",
|
18 |
"openai/whisper-large-v2",
|
19 |
+
"bofenghuang/whisper-medium-cv11-german-punct",
|
|
|
|
|
|
|
20 |
]
|
21 |
CHUNK_LENGTH_S = 30
|
22 |
MAX_NEW_TOKENS = 225
|
|
|
45 |
device=device,
|
46 |
)
|
47 |
# set forced_decoder_ids
|
48 |
+
pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="de", task="transcribe")
|
49 |
# limit genneration max length
|
50 |
pipe.model.config.max_length = MAX_NEW_TOKENS + 1
|
51 |
|