asahi417 commited on
Commit
d90b22e
•
1 Parent(s): c30e8c2

Upload KotobaWhisperPipeline

Browse files
Files changed (4) hide show
  1. README.md +44 -47
  2. config.json +1 -1
  3. generation_config.json +1 -1
  4. kotoba_whisper.py +1 -1
README.md CHANGED
@@ -1,62 +1,59 @@
1
  ---
2
- license: apache-2.0
3
  language: ja
 
4
  tags:
5
  - audio
6
  - automatic-speech-recognition
7
  - hf-asr-leaderboard
 
 
8
  widget:
9
  - example_title: CommonVoice 8.0 (Test Split)
10
- src: >-
11
- https://huggingface.co/datasets/japanese-asr/ja_asr.common_voice_8_0/resolve/main/sample.flac
12
  - example_title: JSUT Basic 5000
13
- src: >-
14
- https://huggingface.co/datasets/japanese-asr/ja_asr.jsut_basic5000/resolve/main/sample.flac
15
  - example_title: ReazonSpeech (Test Split)
16
- src: >-
17
- https://huggingface.co/datasets/japanese-asr/ja_asr.reazonspeech_test/resolve/main/sample.flac
18
  pipeline_tag: automatic-speech-recognition
19
- metrics:
20
- - wer
21
  model-index:
22
- - name: kotoba-tech/kotoba-whisper-v1.0
23
- results:
24
- - task:
25
- type: automatic-speech-recognition
26
- dataset:
27
- name: CommonVoice_8.0 (Japanese)
28
- type: japanese-asr/ja_asr.common_voice_8_0
29
- metrics:
30
- - name: WER
31
- type: WER
32
- value: 59.27
33
- - name: CER
34
- type: CER
35
- value: 9.44
36
- - task:
37
- type: automatic-speech-recognition
38
- dataset:
39
- name: ReazonSpeech (Test)
40
- type: japanese-asr/ja_asr.reazonspeech_test
41
- metrics:
42
- - name: WER
43
- type: WER
44
- value: 56.62
45
- - name: CER
46
- type: CER
47
- value: 12.6
48
- - task:
49
- type: automatic-speech-recognition
50
- dataset:
51
- name: JSUT Basic5000
52
- type: japanese-asr/ja_asr.jsut_basic5000
53
- metrics:
54
- - name: WER
55
- type: WER
56
- value: 64.36
57
- - name: CER
58
- type: CER
59
- value: 8.48
60
  ---
61
 
62
  # Kotoba-Whisper-v1.1
 
1
  ---
 
2
  language: ja
3
+ license: apache-2.0
4
  tags:
5
  - audio
6
  - automatic-speech-recognition
7
  - hf-asr-leaderboard
8
+ metrics:
9
+ - wer
10
  widget:
11
  - example_title: CommonVoice 8.0 (Test Split)
12
+ src: https://huggingface.co/datasets/japanese-asr/ja_asr.common_voice_8_0/resolve/main/sample.flac
 
13
  - example_title: JSUT Basic 5000
14
+ src: https://huggingface.co/datasets/japanese-asr/ja_asr.jsut_basic5000/resolve/main/sample.flac
 
15
  - example_title: ReazonSpeech (Test Split)
16
+ src: https://huggingface.co/datasets/japanese-asr/ja_asr.reazonspeech_test/resolve/main/sample.flac
 
17
  pipeline_tag: automatic-speech-recognition
 
 
18
  model-index:
19
+ - name: kotoba-tech/kotoba-whisper-v1.0
20
+ results:
21
+ - task:
22
+ type: automatic-speech-recognition
23
+ dataset:
24
+ name: CommonVoice_8.0 (Japanese)
25
+ type: japanese-asr/ja_asr.common_voice_8_0
26
+ metrics:
27
+ - type: WER
28
+ value: 59.27
29
+ name: WER
30
+ - type: CER
31
+ value: 9.44
32
+ name: CER
33
+ - task:
34
+ type: automatic-speech-recognition
35
+ dataset:
36
+ name: ReazonSpeech (Test)
37
+ type: japanese-asr/ja_asr.reazonspeech_test
38
+ metrics:
39
+ - type: WER
40
+ value: 56.62
41
+ name: WER
42
+ - type: CER
43
+ value: 12.6
44
+ name: CER
45
+ - task:
46
+ type: automatic-speech-recognition
47
+ dataset:
48
+ name: JSUT Basic5000
49
+ type: japanese-asr/ja_asr.jsut_basic5000
50
+ metrics:
51
+ - type: WER
52
+ value: 64.36
53
+ name: WER
54
+ - type: CER
55
+ value: 8.48
56
+ name: CER
57
  ---
58
 
59
  # Kotoba-Whisper-v1.1
config.json CHANGED
@@ -54,7 +54,7 @@
54
  "pad_token_id": 50256,
55
  "scale_embedding": false,
56
  "torch_dtype": "float32",
57
- "transformers_version": "4.41.0.dev0",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51866
 
54
  "pad_token_id": 50256,
55
  "scale_embedding": false,
56
  "torch_dtype": "float32",
57
+ "transformers_version": "4.40.1",
58
  "use_cache": true,
59
  "use_weighted_layer_sum": false,
60
  "vocab_size": 51866
generation_config.json CHANGED
@@ -261,5 +261,5 @@
261
  "transcribe": 50360,
262
  "translate": 50359
263
  },
264
- "transformers_version": "4.41.0.dev0"
265
  }
 
261
  "transcribe": 50360,
262
  "translate": 50359
263
  },
264
+ "transformers_version": "4.40.1"
265
  }
kotoba_whisper.py CHANGED
@@ -24,7 +24,7 @@ class Punctuator:
24
  def punctuate(self, pipeline_chunk: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
25
 
26
  def validate_punctuation(raw: str, punctuated: str):
27
- if 'unk' in punctuated:
28
  return raw
29
  if punctuated.count("。") > 1:
30
  ind = punctuated.rfind("。")
 
24
  def punctuate(self, pipeline_chunk: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
25
 
26
  def validate_punctuation(raw: str, punctuated: str):
27
+ if 'unk' in punctuated.lower() or any(p in raw for p in self.ja_punctuations):
28
  return raw
29
  if punctuated.count("。") > 1:
30
  ind = punctuated.rfind("。")