Antione commited on
Commit
cfa329c
·
verified ·
1 Parent(s): e6869eb

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +83 -1
README.md CHANGED
@@ -7,4 +7,86 @@ language:
7
  library_name: espnet
8
  tags:
9
  - asr
10
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  library_name: espnet
8
  tags:
9
  - asr
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `espnet/shihlun_asr_whisper_medium_finetuned_chime4`
15
+ This model was trained by Shih-Lun Wu (slseanwu) using the chime4 recipe in [espnet](https://github.com/espnet/espnet/).
16
+
17
+ ### Demo: How to use in ESPnet2
18
+ ```bash
19
+ cd espnet
20
+ pip install -e .
21
+ cd egs2/chime4/asr1
22
+
23
+ train_set=tr05_multi_noisy_si284 # tr05_multi_noisy (original training data) or tr05_multi_noisy_si284 (add si284 data)
24
+ valid_set=dt05_multi_isolated_1ch_track
25
+ test_sets="dt05_real_isolated_1ch_track dt05_simu_isolated_1ch_track et05_real_isolated_1ch_track et05_simu_isolated_1ch_track"
26
+
27
+ asr_tag=whisper_medium_finetune_lr1e-5_adamw_wd1e-2_3epochs
28
+ asr_config=conf/tuning/train_asr_whisper_full.yaml
29
+ inference_config=conf/decode_asr_whisper_noctc_greedy.yaml
30
+
31
+ ./asr.sh \
32
+ ./asr.sh \
33
+ --skip_data_prep false \
34
+ --skip_train false \
35
+ --gpu_inference true \
36
+ --ngpu 4 \
37
+ --lang ko \
38
+ --token_type whisper_multilingual \
39
+ --feats_normalize "" \
40
+ --stage 11 \
41
+ --use_lm ${use_lm} \
42
+ --use_word_lm ${use_wordlm} \
43
+ --lm_config "${lm_config}" \
44
+ --cleaner whisper_basic \
45
+ --asr_config "${asr_config}" \
46
+ --inference_config "${inference_config}" \
47
+ --train_set "${train_set}" \
48
+ --valid_set "${valid_set}" \
49
+ --test_sets "${test_sets}" \
50
+ --speed_perturb_factors "${speed_perturb_factors}" \
51
+ --asr_speech_fold_length 512 \
52
+ --asr_text_fold_length 150 \
53
+ --lm_fold_length 150 \
54
+ --lm_train_text "data/${train_set}/text" "$@"
55
+ ```
56
+
57
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
58
+ # RESULTS
59
+ ## Environments
60
+ - date: `Tue Jan 10 04:15:30 CST 2023`
61
+ - python version: `3.9.13 (main, Aug 25 2022, 23:26:10) [GCC 11.2.0]`
62
+ - espnet version: `espnet 202211`
63
+ - pytorch version: `pytorch 1.12.1`
64
+ - Git hash: `d89be931dcc8f61437ac49cbe39a773f2054c50c`
65
+ - Commit date: `Mon Jan 9 11:06:45 2023 -0600`
66
+
67
+ ## whisper_large_v2_lora_fintuning
68
+ ### WER
69
+
70
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
71
+ |---|---|---|---|---|---|---|---|---|
72
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|24791|97.8|1.7|0.5|0.3|2.5|24.5|
73
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|24792|96.1|3.0|0.9|0.5|4.4|35.6|
74
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|19341|96.4|2.9|0.7|0.5|4.1|33.0|
75
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|19344|93.4|5.0|1.7|0.8|7.4|41.8|
76
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|24791|97.7|1.8|0.5|0.4|2.8|25.5|
77
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|24792|96.0|3.3|0.8|0.7|4.8|36.0|
78
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|19341|96.1|3.3|0.6|0.7|4.6|34.9|
79
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|19344|92.9|5.8|1.3|1.2|8.3|43.2|
80
+
81
+ ### CER
82
+
83
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
84
+ |---|---|---|---|---|---|---|---|---|
85
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|141889|99.1|0.3|0.5|0.3|1.2|24.5|
86
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|141900|98.2|0.8|1.0|0.5|2.3|35.6|
87
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|110558|98.5|0.7|0.8|0.5|1.9|33.0|
88
+ |decode_asr_whisper_noctc_beam20_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|110572|96.5|1.6|1.9|0.8|4.3|41.8|
89
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_real_isolated_1ch_track|1640|141889|99.1|0.4|0.5|0.5|1.3|25.5|
90
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/dt05_simu_isolated_1ch_track|1640|141900|98.2|0.9|0.9|0.6|2.4|36.0|
91
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_real_isolated_1ch_track|1320|110558|98.4|0.9|0.7|0.6|2.2|34.9|
92
+ |decode_asr_whisper_noctc_greedy_asr_model_valid.acc.ave/et05_simu_isolated_1ch_track|1320|110572|96.3|2.0|1.7|1.2|4.9|43.2|