jonatasgrosman commited on
Commit
666ada0
1 Parent(s): 4f81e25

update README

Browse files
Files changed (1) hide show
  1. README.md +0 -3
README.md CHANGED
@@ -102,15 +102,12 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
102
  LANG_ID = "ru"
103
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-russian"
104
  DEVICE = "cuda"
105
- MAX_SAMPLES = 8000
106
 
107
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
108
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
109
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
110
 
111
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
112
- if len(test_dataset) > MAX_SAMPLES:
113
- test_dataset = test_dataset.select(range(MAX_SAMPLES))
114
 
115
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
116
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py
102
  LANG_ID = "ru"
103
  MODEL_ID = "jonatasgrosman/wav2vec2-large-xlsr-53-russian"
104
  DEVICE = "cuda"
 
105
 
106
  CHARS_TO_IGNORE = [",", "?", "¿", ".", "!", "¡", ";", ":", '""', "%", '"', "�", "ʿ", "·", "჻", "~", "՞",
107
  "؟", "،", "।", "॥", "«", "»", "„", "“", "”", "「", "」", "‘", "’", "《", "》", "(", ")", "[", "]",
108
  "=", "`", "_", "+", "<", ">", "…", "–", "°", "´", "ʾ", "‹", "›", "©", "®", "—", "→", "。"]
109
 
110
  test_dataset = load_dataset("common_voice", LANG_ID, split="test")
 
 
111
 
112
  wer = load_metric("wer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/wer.py
113
  cer = load_metric("cer.py") # https://github.com/jonatasgrosman/wav2vec2-sprint/blob/main/cer.py