ayameRushia
commited on
Commit
•
3351ad0
1
Parent(s):
0e1035d
Update README.md
Browse files
README.md
CHANGED
@@ -38,17 +38,29 @@ It achieves the following results on the evaluation set:
|
|
38 |
|
39 |
## Model description
|
40 |
|
41 |
-
|
42 |
|
43 |
## Intended uses & limitations
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
## Training and evaluation data
|
48 |
-
|
49 |
-
More information needed
|
50 |
|
51 |
## Training procedure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
### Training hyperparameters
|
54 |
|
|
|
38 |
|
39 |
## Model description
|
40 |
|
41 |
+
Fine tuned from openai/whisper-v3-turbo
|
42 |
|
43 |
## Intended uses & limitations
|
44 |
|
45 |
+
This model only trained using common voice version 17
|
|
|
|
|
|
|
|
|
46 |
|
47 |
## Training procedure
|
48 |
+
Preprocess data
|
49 |
+
```
|
50 |
+
import re
|
51 |
+
|
52 |
+
chars_to_ignore_regex = '[\,\?\.\!\;\:\"\”\’\'\“\(\)\[\\\\&/!\‘]' # delete following chars
|
53 |
+
chars_to_space_regex = '[\–\—\-]' # replace the following chars into space
|
54 |
+
|
55 |
+
def remove_special_characters(batch):
|
56 |
+
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
|
57 |
+
batch["sentence"] = re.sub(chars_to_space_regex, ' ', batch["sentence"]) + " "
|
58 |
+
# replacing some character
|
59 |
+
batch["sentence"] = batch["sentence"].replace("é", "e").replace("á", "a").replace("ł", "l").replace("ń", "n").replace("ō", "o").strip()
|
60 |
+
return batch
|
61 |
+
|
62 |
+
common_voice = common_voice.map(remove_special_characters)
|
63 |
+
```
|
64 |
|
65 |
### Training hyperparameters
|
66 |
|