update the model: add "-" to the prediction characters

Browse files

Files changed (4) hide show

README.md +4 -25
pytorch_model.bin +2 -2
pytorch_model_v2.bin +0 -3
vocab.json +1 -1

README.md CHANGED Viewed

@@ -44,8 +44,7 @@ ds = load_dataset("common_voice", "fr", split="test", cache_dir="./data/fr")
-chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�\\\\‘\\\\’\\\\’\\\\’\\\\‘\\\\…\\\\·\\\\!\\\\ǃ\\\\?\\\\«\\\\‹\\\\»\\\\›“\\\\”\\\\\\\\ʿ\\\\ʾ\\\\„\\\\∞\\\\\\\\|\\\\.\\\\,\\\\;\\\\:\\\\*\\\\—\\\\–\\\\─\\\\―\\\\_\\\\/\\\\:\\\\ː\\\\;\\\\,\\\\=\\\\«\\\\»\\\\→]'
 def map_to_array(batch):
     speech, _ = torchaudio.load(batch["path"])
     batch["speech"] = resampler.forward(speech.squeeze(0)).numpy()
@@ -72,28 +71,8 @@ wer = load_metric("wer")
 print(wer.compute(predictions=result["predicted"], references=result["target"]))
 ```
-## Training
-6% of the Common Voice `train`, `validation` datasets (20K files) were used for training.
-## Testing
-All the Common Voice `Test` dataset (15763 files) were used for testing.
-Results:
-WER=20.89%
-SER=77.56%
-## New Model (v2)
-~10% of the Common Voice `train`, `validation` datasets (30K files) were used for training.
-Results:
-WER=18.81%
-SER=73.82%

+chars_to_ignore_regex = '[\,\?\.\!\;\:\"\“\%\‘\”\�\‘\’\’\’\‘\…\·\!\ǃ\?\«\‹\»\›“\”\\ʿ\ʾ\„\∞\\|\.\,\;\:\*\—\–\─\―\_\/\:\ː\;\,\=\«\»\→]'
 def map_to_array(batch):
     speech, _ = torchaudio.load(batch["path"])
     batch["speech"] = resampler.forward(speech.squeeze(0)).numpy()
 print(wer.compute(predictions=result["predicted"], references=result["target"]))
 ```
+## Results
+WER=18.29%
+SER=71.44%

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9d5519b860f0eb5200238e304f15f7834fb79be18367eea0138f43f7b67ac495
-size 1262097815

 version https://git-lfs.github.com/spec/v1
+oid sha256:44440692ceb1c3d778d0b89ec5b662f6485d0c8f51dea99173935e8e616f4bc6
+size 1262101911

pytorch_model_v2.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:58735873545095dd226e8e8064f2b5bcd09bf29f9de74175a7b0cc7b7d777494
-size 1262097815

vocab.json CHANGED Viewed

@@ -1 +1 @@

- {"a": 0, "e": 1, "i": 2, "o": 3, "u": 4, "y": 5, "b": 6, "c": 7, "d": 8, "f": 9, "g": 10, "h": 11, "j": 12, "k": 13, "l": 14, "m": 15, "n": 16, "p": 17, "q": 18, "r": 19, "s": 20, "t": 21, "v": 22, "w": 23, "x": 24, "z": 25, "~~\u00e0~~": 26, "~~\u00e2~~": 27, "~~\u00e7~~": 28, "~~\u00e8~~": 29, "~~\u00e9~~": 30, "~~\u00ea~~": 31, "~~\u00ee~~": 32, "~~\u00f4~~": 33, "~~\u00f9~~": 34, "~~\u00fb~~": 35, "|": 36, "'": 37, "<unk>": 38, "<pad>": 39}


1	+ {"a": 0, "e": 1, "i": 2, "o": 3, "u": 4, "y": 5, "b": 6, "c": 7, "d": 8, "f": 9, "g": 10, "h": 11, "j": 12, "k": 13, "l": 14, "m": 15, "n": 16, "p": 17, "q": 18, "r": 19, "s": 20, "t": 21, "v": 22, "w": 23, "x": 24, "z": 25, "à": 26, "â": 27, "ç": 28, "è": 29, "é": 30, "ê": 31, "î": 32, "ô": 33, "ù": 34, "û": 35, "\|": 36, "'": 37, "-": 38, "<unk>": 39, "<pad>": 40}