xekri's picture
Update README and scripts
69882ca
{
"dataset_name": "mozilla-foundation/common_voice_13_0",
"model_name_or_path": "facebook/wav2vec2-large-xlsr-53",
"dataset_config_name": "eo",
"output_dir": "./wav2vec2-common_voice_13_0-eo-10",
"train_split_name": "train",
"eval_split_name": "validation",
"eval_metrics": ["cer", "wer"],
"overwrite_output_dir": true,
"preprocessing_num_workers": 1,
"num_train_epochs": 5,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps": 2,
"gradient_checkpointing": true,
"learning_rate": 3e-5,
"warmup_steps": 500,
"evaluation_strategy": "steps",
"text_column_name": "sentence",
"length_column_name": "input_length",
"save_steps": 1000,
"eval_steps": 1000,
"layerdrop": 0.2,
"save_total_limit": 3,
"freeze_feature_encoder": true,
"chars_to_ignore": "-!\"'(),.:;=?_`¨«¸»ʼ‑–—‘’“”„…‹›♫?",
"chars_to_substitute": {
"przy": "pŝe",
"byn": "bin",
"cx": "ĉ",
"sx": "ŝ",
"fi": "fi",
"fl": "fl",
"ǔ": "ŭ",
"ñ": "nj",
"á": "a",
"é": "e",
"ü": "ŭ",
"y": "j",
"qu": "ku"
},
"fp16": true,
"group_by_length": true,
"push_to_hub": true,
"do_train": true,
"do_eval": true
}