csikasote commited on
Commit
00fca47
1 Parent(s): 08105eb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +7 -7
README.md CHANGED
@@ -40,18 +40,19 @@ import torchaudio
40
  from datasets import load_dataset
41
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
42
 
43
- test_dataset = load_dataset("common_voice", "{lang_id}", split="test[:2%]")
44
 
45
- processor = Wav2Vec2Processor.from_pretrained("{model_id}")
46
- model = Wav2Vec2ForCTC.from_pretrained("{model_id}")
47
 
48
- resampler = torchaudio.transforms.Resample(48_000, 16_000)
 
49
 
50
  # Preprocessing the datasets.
51
  # We need to read the aduio files as arrays
52
  def speech_file_to_array_fn(batch):
53
  speech_array, sampling_rate = torchaudio.load(batch["path"])
54
- batch["speech"] = resampler(speech_array).squeeze().numpy()
55
  return batch
56
 
57
  test_dataset = test_dataset.map(speech_file_to_array_fn)
@@ -66,7 +67,6 @@ print("Prediction:", processor.batch_decode(predicted_ids))
66
  print("Reference:", test_dataset["sentence"][:2])
67
  ```
68
 
69
-
70
  ## Evaluation
71
 
72
  The model can be evaluated as follows on the Bemba test data of BembaSpeech.
@@ -86,7 +86,7 @@ processor = Wav2Vec2Processor.from_pretrained("csikasote/wav2vec2-large-xlsr-bem
86
  model = Wav2Vec2ForCTC.from_pretrained("csikasote/wav2vec2-large-xlsr-bemba")
87
  model.to("cuda")
88
 
89
- chars_to_ignore_regex = '[\,\?\.\!\;\:\"\“]'
90
  #resampler = torchaudio.transforms.Resample(48_000, 16_000)
91
 
92
  # Preprocessing the datasets.
 
40
  from datasets import load_dataset
41
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
42
 
43
+ test_dataset = load_dataset("csv", data_files={"test": "/content/test.csv"}, delimiter="\t")["test"] # Adapt the path to test.csv
44
 
45
+ processor = Wav2Vec2Processor.from_pretrained("csikasote/wav2vec2-large-xlsr-bemba")
46
+ model = Wav2Vec2ForCTC.from_pretrained("csikasote/wav2vec2-large-xlsr-bemba")
47
 
48
+ #BembaSpeech is sample at 16kHz so we you do not need to resample
49
+ #resampler = torchaudio.transforms.Resample(48_000, 16_000)
50
 
51
  # Preprocessing the datasets.
52
  # We need to read the aduio files as arrays
53
  def speech_file_to_array_fn(batch):
54
  speech_array, sampling_rate = torchaudio.load(batch["path"])
55
+ batch["speech"] = speech_array.squeeze().numpy()
56
  return batch
57
 
58
  test_dataset = test_dataset.map(speech_file_to_array_fn)
 
67
  print("Reference:", test_dataset["sentence"][:2])
68
  ```
69
 
 
70
  ## Evaluation
71
 
72
  The model can be evaluated as follows on the Bemba test data of BembaSpeech.
 
86
  model = Wav2Vec2ForCTC.from_pretrained("csikasote/wav2vec2-large-xlsr-bemba")
87
  model.to("cuda")
88
 
89
+ chars_to_ignore_regex = '[\\,\\?\\.\\!\\;\\:\\"\\“]'
90
  #resampler = torchaudio.transforms.Resample(48_000, 16_000)
91
 
92
  # Preprocessing the datasets.