ydshieh HF staff commited on
Commit
f4a9497
1 Parent(s): c2b028f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -56,7 +56,7 @@ def speech_file_to_array_fn(batch):
56
  return batch
57
 
58
  test_dataset = test_dataset.map(speech_file_to_array_fn)
59
- inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
60
 
61
  with torch.no_grad():
62
  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
@@ -64,7 +64,7 @@ with torch.no_grad():
64
  predicted_ids = torch.argmax(logits, dim=-1)
65
 
66
  print("Prediction:", processor.batch_decode(predicted_ids))
67
- print("Reference:", test_dataset["sentence"][:2])
68
  ```
69
 
70
 
@@ -114,7 +114,7 @@ processor = Wav2Vec2Processor.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-ch
114
  model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
115
  model.to("cuda")
116
 
117
- chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:"\\“\\%\\‘\\”\\�\\.\\⋯\\!\\-\\:\\–\\。\\》\\,\\)\\,\\?\\;\\~\\~\\…\\︰\\,\\(\\」\\‧\\《\\﹔\\、\\—\\/\\,\\「\\﹖\\·\\×\\̃\\̌\\ε\\λ\\μ\\и\\т\\─\\□\\〈\\〉\\『\\』\\ア\\オ\\カ\\チ\\ド\\ベ\\ャ\\ヤ\\ン\\・\\丶\\a\\b\\f\\g\\i\\n\\p\\t' + "\\']"
118
 
119
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
120
 
 
56
  return batch
57
 
58
  test_dataset = test_dataset.map(speech_file_to_array_fn)
59
+ inputs = processor(test_dataset[:2]["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
60
 
61
  with torch.no_grad():
62
  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
 
64
  predicted_ids = torch.argmax(logits, dim=-1)
65
 
66
  print("Prediction:", processor.batch_decode(predicted_ids))
67
+ print("Reference:", test_dataset[:2]["sentence"])
68
  ```
69
 
70
 
 
114
  model = Wav2Vec2ForCTC.from_pretrained("ydshieh/wav2vec2-large-xlsr-53-chinese-zh-cn-gpt")
115
  model.to("cuda")
116
 
117
+ chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:"\\\\“\\\\%\\\\‘\\\\”\\\\�\\\\.\\\\⋯\\\\!\\\\-\\\\:\\\\–\\\\。\\\\》\\\\,\\\\)\\\\,\\\\?\\\\;\\\\~\\\\~\\\\…\\\\︰\\\\,\\\\(\\\\」\\\\‧\\\\《\\\\﹔\\\\、\\\\—\\\\/\\\\,\\\\「\\\\﹖\\\\·\\\\×\\\\̃\\\\̌\\\\ε\\\\λ\\\\μ\\\\и\\\\т\\\\─\\\\□\\\\〈\\\\〉\\\\『\\\\』\\\\ア\\\\オ\\\\カ\\\\チ\\\\ド\\\\ベ\\\\ャ\\\\ヤ\\\\ン\\\\・\\\\丶\\\\a\\\\b\\\\f\\\\g\\\\i\\\\n\\\\p\\\\t' + "\\\\']"
118
 
119
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
120