patrickvonplaten commited on
Commit
ded564d
β€’
1 Parent(s): bdbf2fe

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -2
README.md CHANGED
@@ -37,10 +37,11 @@ import librosa
37
  from datasets import load_dataset
38
  import MeCab
39
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 
40
 
41
  # config
42
  wakati = MeCab.Tagger("-Owakati")
43
- chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\、\\\\\\\\。\\\\\\\\.\\\\\\\\γ€Œ\\\\\\\\」\\\\\\\\…\\\\\\\\?\\\\\\\\・]'
44
 
45
  # load data, processor and model
46
  test_dataset = load_dataset("common_voice", "ja", split="test[:2%]")
@@ -76,10 +77,11 @@ import torchaudio
76
  from datasets import load_dataset, load_metric
77
  import MeCab
78
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 
79
 
80
  #config
81
  wakati = MeCab.Tagger("-Owakati")
82
- chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\、\\\\\\\\。\\\\\\\\.\\\\\\\\γ€Œ\\\\\\\\」\\\\\\\\…\\\\\\\\?\\\\\\\\・]'
83
 
84
  # load data, processor and model
85
  test_dataset = load_dataset("common_voice", "ja", split="test")
 
37
  from datasets import load_dataset
38
  import MeCab
39
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
40
+ import re
41
 
42
  # config
43
  wakati = MeCab.Tagger("-Owakati")
44
+ chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\、\\\\\\\\\\\\\\\\。\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\γ€Œ\\\\\\\\\\\\\\\\」\\\\\\\\\\\\\\\\…\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\・]'
45
 
46
  # load data, processor and model
47
  test_dataset = load_dataset("common_voice", "ja", split="test[:2%]")
 
77
  from datasets import load_dataset, load_metric
78
  import MeCab
79
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
80
+ import re
81
 
82
  #config
83
  wakati = MeCab.Tagger("-Owakati")
84
+ chars_to_ignore_regex = '[\\\\\\\\\\\\\\\\,\\\\\\\\\\\\\\\\、\\\\\\\\\\\\\\\\。\\\\\\\\\\\\\\\\.\\\\\\\\\\\\\\\\γ€Œ\\\\\\\\\\\\\\\\」\\\\\\\\\\\\\\\\…\\\\\\\\\\\\\\\\?\\\\\\\\\\\\\\\\・]'
85
 
86
  # load data, processor and model
87
  test_dataset = load_dataset("common_voice", "ja", split="test")