jonatasgrosman commited on
Commit
a6e5db9
1 Parent(s): 964ddc3

update README

Browse files
Files changed (1) hide show
  1. README.md +0 -3
README.md CHANGED
@@ -101,15 +101,12 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
101
  LANG_ID = "ja"
102
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-japanese"
103
  DEVICE = "cuda"
104
- MAX_SAMPLES = 8000
105
 
106
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
107
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
108
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
109
 
110
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
111
- if len(test_dataset) > MAX_SAMPLES:
112
- test_dataset = test_dataset.select(range(MAX_SAMPLES))
113
 
114
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
115
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
 
101
  LANG_ID = "ja"
102
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-japanese"
103
  DEVICE = "cuda"
 
104
 
105
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
106
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
107
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
108
 
109
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
 
 
110
 
111
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
112
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py