bofenghuang commited on
Commit
ec85714
1 Parent(s): 8aaccc9
Files changed (3) hide show
  1. README.md +1 -1
  2. run_demo.py +2 -2
  3. run_demo_multi_models.py +3 -6
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Whisper French Demo
3
  emoji: 🤫
4
  colorFrom: indigo
5
  colorTo: red
 
1
  ---
2
+ title: Whisper German Demo
3
  emoji: 🤫
4
  colorFrom: indigo
5
  colorTo: red
run_demo.py CHANGED
@@ -5,7 +5,7 @@ import pytube as pt
5
  from transformers import pipeline
6
  from huggingface_hub import model_info
7
 
8
- MODEL_NAME = "bofenghuang/whisper-medium-cv11-french-punct"
9
  CHUNK_LENGTH_S = 30
10
 
11
  device = 0 if torch.cuda.is_available() else "cpu"
@@ -16,7 +16,7 @@ pipe = pipeline(
16
  device=device,
17
  )
18
 
19
- pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="fr", task="transcribe")
20
 
21
  def transcribe(microphone, file_upload):
22
  warn_output = ""
 
5
  from transformers import pipeline
6
  from huggingface_hub import model_info
7
 
8
+ MODEL_NAME = "bofenghuang/whisper-medium-cv11-german-punct"
9
  CHUNK_LENGTH_S = 30
10
 
11
  device = 0 if torch.cuda.is_available() else "cpu"
 
16
  device=device,
17
  )
18
 
19
+ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="de", task="transcribe")
20
 
21
  def transcribe(microphone, file_upload):
22
  warn_output = ""
run_demo_multi_models.py CHANGED
@@ -11,15 +11,12 @@ from transformers.utils.logging import disable_progress_bar
11
  warnings.filterwarnings("ignore")
12
  disable_progress_bar()
13
 
14
- DEFAULT_MODEL_NAME = "bofenghuang/whisper-medium-cv11-french-punct"
15
  MODEL_NAMES = [
16
  "openai/whisper-small",
17
  "openai/whisper-medium",
18
  "openai/whisper-large-v2",
19
- "bofenghuang/whisper-small-cv11-french",
20
- "bofenghuang/whisper-small-cv11-french-punct",
21
- "bofenghuang/whisper-medium-cv11-french",
22
- "bofenghuang/whisper-medium-cv11-french-punct",
23
  ]
24
  CHUNK_LENGTH_S = 30
25
  MAX_NEW_TOKENS = 225
@@ -48,7 +45,7 @@ def maybe_load_cached_pipeline(model_name):
48
  device=device,
49
  )
50
  # set forced_decoder_ids
51
- pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="fr", task="transcribe")
52
  # limit genneration max length
53
  pipe.model.config.max_length = MAX_NEW_TOKENS + 1
54
 
 
11
  warnings.filterwarnings("ignore")
12
  disable_progress_bar()
13
 
14
+ DEFAULT_MODEL_NAME = "bofenghuang/whisper-medium-cv11-german-punct"
15
  MODEL_NAMES = [
16
  "openai/whisper-small",
17
  "openai/whisper-medium",
18
  "openai/whisper-large-v2",
19
+ "bofenghuang/whisper-medium-cv11-german-punct",
 
 
 
20
  ]
21
  CHUNK_LENGTH_S = 30
22
  MAX_NEW_TOKENS = 225
 
45
  device=device,
46
  )
47
  # set forced_decoder_ids
48
+ pipe.model.config.forced_decoder_ids = pipe.tokenizer.get_decoder_prompt_ids(language="de", task="transcribe")
49
  # limit genneration max length
50
  pipe.model.config.max_length = MAX_NEW_TOKENS + 1
51