patrickvonplaten commited on
Commit
a57b9fd
1 Parent(s): 246a60f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -5
README.md CHANGED
@@ -81,10 +81,6 @@ print("Reference:", test_dataset["sentence"][:2])
81
  The model can be evaluated as follows on the Japanese test data of Common Voice.
82
 
83
  ```python
84
- !pip install mecab-python3
85
- !pip install unidic-lite
86
- !python -m unidic download
87
-
88
  import torch
89
  import torchaudio
90
  from datasets import load_dataset, load_metric
@@ -98,7 +94,7 @@ processor = Wav2Vec2Processor.from_pretrained("qqhann/w2v_hf_jsut_xlsr53")
98
  model = Wav2Vec2ForCTC.from_pretrained("qqhann/w2v_hf_jsut_xlsr53")
99
  model.to("cuda")
100
 
101
- chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“]' # TODO: adapt this list to include all special characters you removed from the data
102
  # resampler = torchaudio.transforms.Resample(48_000, 16_000) # JSUT is already 16kHz
103
  resampler = torchaudio.transforms.Resample(16_000, 16_000) # JSUT is already 16kHz
104
 
 
81
  The model can be evaluated as follows on the Japanese test data of Common Voice.
82
 
83
  ```python
 
 
 
 
84
  import torch
85
  import torchaudio
86
  from datasets import load_dataset, load_metric
 
94
  model = Wav2Vec2ForCTC.from_pretrained("qqhann/w2v_hf_jsut_xlsr53")
95
  model.to("cuda")
96
 
97
+ chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“]' # TODO: adapt this list to include all special characters you removed from the data
98
  # resampler = torchaudio.transforms.Resample(48_000, 16_000) # JSUT is already 16kHz
99
  resampler = torchaudio.transforms.Resample(16_000, 16_000) # JSUT is already 16kHz
100