Upload 9 files

Browse files

Files changed (9) hide show

README.md +18 -0
data/es_token_list/bpe_unigram64/bpe.model +3 -0
data/es_token_list/bpe_unigram64/tokens.txt +64 -0
exp/asr_stats_raw_es_bpe64_sp/train/feats_stats.npz +3 -0
exp/asr_train_asr_transformer_raw_es_bpe64_sp/config.yaml +258 -0
exp/asr_train_asr_transformer_raw_es_bpe64_sp/valid.acc.ave_10best.pth +3 -0
exp/lm_train_lm_es_bpe64/20epoch.pth +3 -0
exp/lm_train_lm_es_bpe64/config.yaml +194 -0
meta.yaml +9 -0

README.md ADDED Viewed

	@@ -0,0 +1,18 @@

+---
+license: apache-2.0
+language:
+- es
+library_name: espnet
+tags:
+- automatic-speech-recognition
+- speech
+- espnet
+- spanish
+---
+# reazonspeech-espnet-v1
+`reazonspeech-espnet-v1` es un modelo de reconocimiento automático del habla (ASR) entrenado con espnet2 para el español ecuatoriano. Este modelo tiene como objetivo reconocer el habla de diferentes regiones y acentos del Ecuador, usando un corpus propio y el corpus de Common Voice. El modelo usa una arquitectura de transformador con codificación por subpalabras (BPE). El modelo alcanza un WER de X% y un MOS de Y en el conjunto de datos de prueba. Para más detalles sobre el modelo, puedes consultar este artículo.

data/es_token_list/bpe_unigram64/bpe.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ebfb4bda4d1ca304ed4cb00bb4fc00c861634ea02f912bb79f8431443c33e4f
+size 238394

data/es_token_list/bpe_unigram64/tokens.txt ADDED Viewed

	@@ -0,0 +1,64 @@

+<blank>
+<unk>
+▁
+a
+o
+i
+s
+r
+e
+n
+c
+u
+l
+m
+b
+g
+t
+▁de
+▁a
+en
+do
+er
+▁p
+ra
+ta
+te
+h
+▁que
+p
+▁la
+▁el
+▁es
+to
+d
+da
+es
+▁no
+os
+y
+▁y
+▁ma
+▁un
+▁se
+▁en
+la
+f
+z
+ñ
+0
+4
+3
+w
+6
+8
+9
+2
+1
+x
+j
+q
+5
+7
+v
+<sos/eos>

exp/asr_stats_raw_es_bpe64_sp/train/feats_stats.npz ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92894c3672055bd42b228eafdcca0cc989649de0d4fdbe3f4e163d82c22b70f1
+size 1402

exp/asr_train_asr_transformer_raw_es_bpe64_sp/config.yaml ADDED Viewed

	@@ -0,0 +1,258 @@

+config: conf/train_asr_transformer.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp/asr_train_asr_transformer_raw_es_bpe64_sp
+ngpu: 0
+seed: 0
+num_workers: 1
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: null
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+collect_stats: false
+write_collected_feats: false
+max_epoch: 20
+patience: null
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+-   - valid
+    - acc
+    - max
+keep_nbest_models: 10
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: false
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_lora: false
+save_lora_only: true
+lora_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 16
+valid_batch_size: null
+batch_bins: 1000000
+valid_batch_bins: null
+train_shape_file:
+- exp/asr_stats_raw_es_bpe64_sp/train/speech_shape
+- exp/asr_stats_raw_es_bpe64_sp/train/text_shape.bpe
+valid_shape_file:
+- exp/asr_stats_raw_es_bpe64_sp/valid/speech_shape
+- exp/asr_stats_raw_es_bpe64_sp/valid/text_shape.bpe
+batch_type: folded
+valid_batch_type: null
+fold_length:
+- 80000
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+train_data_path_and_name_and_type:
+-   - dump/raw/train_nodev_sp/wav.scp
+    - speech
+    - sound
+-   - dump/raw/train_nodev_sp/text
+    - text
+    - text
+valid_data_path_and_name_and_type:
+-   - dump/raw/train_dev/wav.scp
+    - speech
+    - sound
+-   - dump/raw/train_dev/text
+    - text
+    - text
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+    lr: 0.001
+scheduler: warmuplr
+scheduler_conf:
+    warmup_steps: 2500
+token_list:
+- <blank>
+- <unk>
+- ▁
+- a
+- o
+- i
+- s
+- r
+- e
+- n
+- c
+- u
+- l
+- m
+- b
+- g
+- t
+- ▁de
+- ▁a
+- en
+- do
+- er
+- ▁p
+- ra
+- ta
+- te
+- h
+- ▁que
+- p
+- ▁la
+- ▁el
+- ▁es
+- to
+- d
+- da
+- es
+- ▁no
+- os
+- y
+- ▁y
+- ▁ma
+- ▁un
+- ▁se
+- ▁en
+- la
+- f
+- z
+- ñ
+- '0'
+- '4'
+- '3'
+- w
+- '6'
+- '8'
+- '9'
+- '2'
+- '1'
+- x
+- j
+- q
+- '5'
+- '7'
+- v
+- <sos/eos>
+init: xavier_uniform
+input_size: null
+ctc_conf:
+    dropout_rate: 0.0
+    ctc_type: builtin
+    reduce: true
+    ignore_nan_grad: null
+    zero_infinity: true
+    brctc_risk_strategy: exp
+    brctc_group_strategy: end
+    brctc_risk_factor: 0.0
+joint_net_conf: null
+use_preprocessor: true
+use_lang_prompt: false
+use_nlp_prompt: false
+token_type: bpe
+bpemodel: data/es_token_list/bpe_unigram64/bpe.model
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+speech_volume_normalize: null
+rir_scp: null
+rir_apply_prob: 1.0
+noise_scp: null
+noise_apply_prob: 1.0
+noise_db_range: '13_15'
+short_noise_thres: 0.5
+aux_ctc_tasks: []
+frontend: default
+frontend_conf:
+    fs: 16k
+specaug: null
+specaug_conf: {}
+normalize: global_mvn
+normalize_conf:
+    stats_file: exp/asr_stats_raw_es_bpe64_sp/train/feats_stats.npz
+model: espnet
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1
+    length_normalized_loss: false
+preencoder: null
+preencoder_conf: {}
+encoder: transformer
+encoder_conf:
+    output_size: 256
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 12
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.0
+    input_layer: conv2d
+    normalize_before: true
+postencoder: null
+postencoder_conf: {}
+decoder: transformer
+decoder_conf:
+    attention_heads: 4
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+preprocessor: default
+preprocessor_conf: {}
+required:
+- output_dir
+- token_list
+version: '202402'
+distributed: false

exp/asr_train_asr_transformer_raw_es_bpe64_sp/valid.acc.ave_10best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00ea407c6f54f0f1b6e22906dd350afb4f6197e525033a33b35045595dcb2288
+size 108783461

exp/lm_train_lm_es_bpe64/20epoch.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ed654cc6a9a7a28311d10605d10e3575f6b1a4928ccdc598e958d915241e89e
+size 27417796

exp/lm_train_lm_es_bpe64/config.yaml ADDED Viewed

	@@ -0,0 +1,194 @@

+config: conf/train_lm.yaml
+print_config: false
+log_level: INFO
+drop_last_iter: false
+dry_run: false
+iterator_type: sequence
+valid_iterator_type: null
+output_dir: exp/lm_train_lm_es_bpe64
+ngpu: 0
+seed: 0
+num_workers: 1
+num_att_plot: 3
+dist_backend: nccl
+dist_init_method: env://
+dist_world_size: null
+dist_rank: null
+local_rank: null
+dist_master_addr: null
+dist_master_port: null
+dist_launcher: null
+multiprocessing_distributed: false
+unused_parameters: false
+sharded_ddp: false
+cudnn_enabled: true
+cudnn_benchmark: false
+cudnn_deterministic: true
+collect_stats: false
+write_collected_feats: false
+max_epoch: 20
+patience: null
+val_scheduler_criterion:
+- valid
+- loss
+early_stopping_criterion:
+- valid
+- loss
+- min
+best_model_criterion:
+-   - valid
+    - loss
+    - min
+keep_nbest_models: 1
+nbest_averaging_interval: 0
+grad_clip: 5.0
+grad_clip_type: 2.0
+grad_noise: false
+accum_grad: 1
+no_forward_run: false
+resume: true
+train_dtype: float32
+use_amp: false
+log_interval: null
+use_matplotlib: true
+use_tensorboard: true
+create_graph_in_tensorboard: false
+use_wandb: false
+wandb_project: null
+wandb_id: null
+wandb_entity: null
+wandb_name: null
+wandb_model_log_interval: -1
+detect_anomaly: false
+use_lora: false
+save_lora_only: true
+lora_conf: {}
+pretrain_path: null
+init_param: []
+ignore_init_mismatch: false
+freeze_param: []
+num_iters_per_epoch: null
+batch_size: 64
+valid_batch_size: null
+batch_bins: 1000000
+valid_batch_bins: null
+train_shape_file:
+- exp/lm_stats_es_bpe64/train/text_shape.bpe
+valid_shape_file:
+- exp/lm_stats_es_bpe64/valid/text_shape.bpe
+batch_type: folded
+valid_batch_type: null
+fold_length:
+- 150
+sort_in_batch: descending
+shuffle_within_batch: false
+sort_batch: descending
+multiple_iterator: false
+chunk_length: 500
+chunk_shift_ratio: 0.5
+num_cache_chunks: 1024
+chunk_excluded_key_prefixes: []
+chunk_default_fs: null
+train_data_path_and_name_and_type:
+-   - dump/raw/lm_train.txt
+    - text
+    - text
+valid_data_path_and_name_and_type:
+-   - dump/raw/org/train_dev/text
+    - text
+    - text
+allow_variable_data_keys: false
+max_cache_size: 0.0
+max_cache_fd: 32
+allow_multi_rates: false
+valid_max_cache_size: null
+exclude_weight_decay: false
+exclude_weight_decay_conf: {}
+optim: adam
+optim_conf:
+    lr: 0.1
+scheduler: null
+scheduler_conf: {}
+token_list:
+- <blank>
+- <unk>
+- ▁
+- a
+- o
+- i
+- s
+- r
+- e
+- n
+- c
+- u
+- l
+- m
+- b
+- g
+- t
+- ▁de
+- ▁a
+- en
+- do
+- er
+- ▁p
+- ra
+- ta
+- te
+- h
+- ▁que
+- p
+- ▁la
+- ▁el
+- ▁es
+- to
+- d
+- da
+- es
+- ▁no
+- os
+- y
+- ▁y
+- ▁ma
+- ▁un
+- ▁se
+- ▁en
+- la
+- f
+- z
+- ñ
+- '0'
+- '4'
+- '3'
+- w
+- '6'
+- '8'
+- '9'
+- '2'
+- '1'
+- x
+- j
+- q
+- '5'
+- '7'
+- v
+- <sos/eos>
+init: null
+use_preprocessor: true
+token_type: bpe
+bpemodel: data/es_token_list/bpe_unigram64/bpe.model
+non_linguistic_symbols: null
+cleaner: null
+g2p: null
+lm: seq_rnn
+lm_conf:
+    unit: 650
+    nlayers: 2
+model: lm
+model_conf: {}
+required:
+- output_dir
+- token_list
+version: '202402'
+distributed: false

meta.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+espnet: '202310'
+files:
+  asr_model_file: exp/asr_train_asr_transformer_raw_es_bpe64_sp/valid.acc.ave_10best.pth
+  lm_file: exp/lm_train_lm_es_bpe64/20epoch.pth
+python: "3.8"
+pytorch: 1.12.1
+yaml_files:
+  asr_train_config: exp/asr_train_asr_transformer_raw_es_bpe64_sp/config.yaml
+  lm_train_config: exp/lm_train_lm_es_bpe64/config.yaml