added eval.sh ad eval results; added robust speech event tag

Browse files

Files changed (9) hide show

.gitignore +2 -1
.ipynb_checkpoints/README-checkpoint.md +78 -0
.ipynb_checkpoints/eval-checkpoint.sh +8 -0
.ipynb_checkpoints/run-checkpoint.sh +1 -2
.ipynb_checkpoints/speech-recognition-community-v2_dev_data_zh-HK_validation_eval_results-checkpoint.txt +2 -0
README.md +2 -0
eval.sh +8 -0
run.sh +1 -2
speech-recognition-community-v2_dev_data_zh-HK_validation_eval_results.txt +2 -0

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- checkpoint-*/


1	+ checkpoint-*/
2	+ log*

.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,78 @@

+---
+language:
+- zh-HK
+license: apache-2.0
+tags:
+- automatic-speech-recognition
+- mozilla-foundation/common_voice_8_0
+- generated_from_trainer
+- zh-HK
+- robust-speech-event
+datasets:
+- common_voice
+model-index:
+- name: ''
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+#
+This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - ZH-HK dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.6726
+- Wer: 0.9815
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 32
+- eval_batch_size: 16
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 64
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 10.0
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Wer    |
+|:-------------:|:-----:|:----:|:---------------:|:------:|
+| No log        | 1.0   | 183  | 47.8442         | 1.0    |
+| No log        | 2.0   | 366  | 6.3109          | 1.0    |
+| 41.8902       | 3.0   | 549  | 6.2392          | 1.0    |
+| 41.8902       | 4.0   | 732  | 5.9739          | 1.1123 |
+| 41.8902       | 5.0   | 915  | 4.9014          | 1.9474 |
+| 5.5817        | 6.0   | 1098 | 3.9892          | 1.0188 |
+| 5.5817        | 7.0   | 1281 | 3.5080          | 1.0104 |
+| 5.5817        | 8.0   | 1464 | 3.0797          | 0.9905 |
+| 3.5579        | 9.0   | 1647 | 2.8111          | 0.9836 |
+| 3.5579        | 10.0  | 1830 | 2.6726          | 0.9815 |
+### Framework versions
+- Transformers 4.17.0.dev0
+- Pytorch 1.10.2+cu102
+- Datasets 1.18.3
+- Tokenizers 0.11.0

.ipynb_checkpoints/eval-checkpoint.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+python eval.py \
+--model_id="ivanlau/wav2vec2-large-xls-r-300m-cantonese" \
+--dataset="speech-recognition-community-v2/dev_data" \
+--config="zh-HK" \
+--split="validation" \
+--chunk_length_s="5.0" \
+--stride_length_s="1.0" \
+--log_outputs \

.ipynb_checkpoints/run-checkpoint.sh CHANGED Viewed

@@ -4,8 +4,7 @@ python run_speech_recognition_ctc.py \
 	--dataset_config_name="zh-HK" \
 	--output_dir="./" \
     --cache_dir="../container_0" \
-	--overwrite_output_dir \
-	--num_train_epochs="10" \
 	--per_device_train_batch_size="32" \
     --per_device_eval_batch_size="16" \
 	--gradient_accumulation_steps="2" \

 	--dataset_config_name="zh-HK" \
 	--output_dir="./" \
     --cache_dir="../container_0" \
+	--num_train_epochs="90" \
 	--per_device_train_batch_size="32" \
     --per_device_eval_batch_size="16" \
 	--gradient_accumulation_steps="2" \

.ipynb_checkpoints/speech-recognition-community-v2_dev_data_zh-HK_validation_eval_results-checkpoint.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ WER: 1.0
2	+ CER: 0.7386630836412496

README.md CHANGED Viewed

@@ -6,6 +6,8 @@ tags:
 - automatic-speech-recognition
 - mozilla-foundation/common_voice_8_0
 - generated_from_trainer
 datasets:
 - common_voice
 model-index:

 - automatic-speech-recognition
 - mozilla-foundation/common_voice_8_0
 - generated_from_trainer
+- zh-HK
+- robust-speech-event
 datasets:
 - common_voice
 model-index:

eval.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+python eval.py \
+--model_id="ivanlau/wav2vec2-large-xls-r-300m-cantonese" \
+--dataset="speech-recognition-community-v2/dev_data" \
+--config="zh-HK" \
+--split="validation" \
+--chunk_length_s="5.0" \
+--stride_length_s="1.0" \
+--log_outputs \

run.sh CHANGED Viewed

@@ -4,8 +4,7 @@ python run_speech_recognition_ctc.py \
 	--dataset_config_name="zh-HK" \
 	--output_dir="./" \
     --cache_dir="../container_0" \
-	--overwrite_output_dir \
-	--num_train_epochs="10" \
 	--per_device_train_batch_size="32" \
     --per_device_eval_batch_size="16" \
 	--gradient_accumulation_steps="2" \

 	--dataset_config_name="zh-HK" \
 	--output_dir="./" \
     --cache_dir="../container_0" \
+	--num_train_epochs="90" \
 	--per_device_train_batch_size="32" \
     --per_device_eval_batch_size="16" \
 	--gradient_accumulation_steps="2" \

speech-recognition-community-v2_dev_data_zh-HK_validation_eval_results.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ WER: 1.0
2	+ CER: 0.7386630836412496