|
--- |
|
tags: |
|
- espnet |
|
- audio |
|
- automatic-speech-recognition |
|
language: en |
|
datasets: |
|
- lrs3 |
|
license: cc-by-4.0 |
|
--- |
|
|
|
## ESPnet2 AVSR model |
|
|
|
### `espnet/msk_lrs3_train_avsr_avhubert_large_extracted_en_bpe1000` |
|
|
|
This model was trained by ms-dot-k using lrs3 recipe in [espnet](https://github.com/espnet/espnet/). |
|
|
|
### Demo: How to use in ESPnet2 |
|
|
|
Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html) |
|
if you haven't done that already. |
|
|
|
```bash |
|
cd espnet |
|
pip install -e . |
|
cd egs2/lrs3/avsr1 |
|
./run.sh --skip_data_prep false --skip_train true --download_model espnet/msk_lrs3_train_avsr_avhubert_large_extracted_en_bpe1000 |
|
``` |
|
|
|
<!-- Generated by scripts/utils/show_asr_result.sh --> |
|
# RESULTS |
|
## Environments |
|
- date: `Thu Sep 28 23:59:06 KST 2023` |
|
- python version: `3.8.18 (default, Sep 11 2023, 13:40:15) [GCC 11.2.0]` |
|
- espnet version: `espnet 202308` |
|
- pytorch version: `pytorch 1.12.0` |
|
- Git hash: `5d0758e2a7063b82d1f10a8ac2de98eb6cf8a352` |
|
- Commit date: `Wed Aug 30 18:03:42 2023 -0400` |
|
|
|
## exp/asr_train_avsr_avhubert_large_extracted_en_bpe1000 |
|
### WER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|inference_asr_model_valid.acc.ave/test|1321|9890|98.5|1.1|0.4|0.2|1.7|8.8| |
|
|
|
### CER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|inference_asr_model_valid.acc.ave/test|1321|49750|99.4|0.2|0.4|0.2|0.8|8.8| |
|
|
|
### TER |
|
|
|
|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| |
|
|---|---|---|---|---|---|---|---|---| |
|
|inference_asr_model_valid.acc.ave/test|1321|14940|98.8|0.8|0.4|0.3|1.5|8.8| |
|
|
|
|
|
|
|
## ASR config |
|
|
|
<details><summary>expand</summary> |
|
|
|
``` |
|
config: conf/train_avsr_avhubert_large.yaml |
|
print_config: false |
|
log_level: INFO |
|
drop_last_iter: false |
|
dry_run: false |
|
iterator_type: sequence |
|
valid_iterator_type: null |
|
output_dir: exp/asr_train_avsr_avhubert_large_extracted_en_bpe1000 |
|
ngpu: 1 |
|
seed: 0 |
|
num_workers: 1 |
|
num_att_plot: 3 |
|
dist_backend: nccl |
|
dist_init_method: env:// |
|
dist_world_size: 4 |
|
dist_rank: 0 |
|
local_rank: 0 |
|
dist_master_addr: localhost |
|
dist_master_port: 54927 |
|
dist_launcher: null |
|
multiprocessing_distributed: true |
|
unused_parameters: true |
|
sharded_ddp: false |
|
cudnn_enabled: true |
|
cudnn_benchmark: false |
|
cudnn_deterministic: true |
|
collect_stats: false |
|
write_collected_feats: false |
|
max_epoch: 20 |
|
patience: null |
|
val_scheduler_criterion: |
|
- valid |
|
- loss |
|
early_stopping_criterion: |
|
- valid |
|
- loss |
|
- min |
|
best_model_criterion: |
|
- - valid |
|
- acc |
|
- max |
|
keep_nbest_models: 10 |
|
nbest_averaging_interval: 0 |
|
grad_clip: 5.0 |
|
grad_clip_type: 2.0 |
|
grad_noise: false |
|
accum_grad: 1 |
|
no_forward_run: false |
|
resume: true |
|
train_dtype: float32 |
|
use_amp: false |
|
log_interval: null |
|
use_matplotlib: true |
|
use_tensorboard: true |
|
create_graph_in_tensorboard: false |
|
use_wandb: false |
|
wandb_project: null |
|
wandb_id: null |
|
wandb_entity: null |
|
wandb_name: null |
|
wandb_model_log_interval: -1 |
|
detect_anomaly: false |
|
pretrain_path: null |
|
init_param: [] |
|
ignore_init_mismatch: false |
|
freeze_param: [] |
|
num_iters_per_epoch: null |
|
batch_size: 16 |
|
valid_batch_size: null |
|
batch_bins: 1000000 |
|
valid_batch_bins: null |
|
train_shape_file: |
|
- exp/asr_stats_extracted_en_bpe1000/train/speech_shape |
|
- exp/asr_stats_extracted_en_bpe1000/train/text_shape.bpe |
|
valid_shape_file: |
|
- exp/asr_stats_extracted_en_bpe1000/valid/speech_shape |
|
- exp/asr_stats_extracted_en_bpe1000/valid/text_shape.bpe |
|
batch_type: folded |
|
valid_batch_type: null |
|
fold_length: |
|
- 800 |
|
- 150 |
|
sort_in_batch: descending |
|
shuffle_within_batch: false |
|
sort_batch: descending |
|
multiple_iterator: false |
|
chunk_length: 500 |
|
chunk_shift_ratio: 0.5 |
|
num_cache_chunks: 1024 |
|
chunk_excluded_key_prefixes: [] |
|
train_data_path_and_name_and_type: |
|
- - dump/extracted/train/feats.scp |
|
- speech |
|
- kaldi_ark |
|
- - dump/extracted/train/text |
|
- text |
|
- text |
|
valid_data_path_and_name_and_type: |
|
- - dump/extracted/val/feats.scp |
|
- speech |
|
- kaldi_ark |
|
- - dump/extracted/val/text |
|
- text |
|
- text |
|
allow_variable_data_keys: false |
|
max_cache_size: 0.0 |
|
max_cache_fd: 32 |
|
valid_max_cache_size: null |
|
exclude_weight_decay: false |
|
exclude_weight_decay_conf: {} |
|
optim: adam |
|
optim_conf: |
|
lr: 0.0003 |
|
scheduler: warmuplr |
|
scheduler_conf: |
|
warmup_steps: 8000 |
|
token_list: |
|
- <blank> |
|
- <unk> |
|
- S |
|
- ▁THE |
|
- ▁TO |
|
- ▁A |
|
- ▁AND |
|
- T |
|
- ▁I |
|
- '''' |
|
- ▁OF |
|
- ▁THAT |
|
- ▁IN |
|
- ING |
|
- D |
|
- ▁YOU |
|
- ▁WE |
|
- E |
|
- ▁IT |
|
- N |
|
- ED |
|
- ▁IS |
|
- R |
|
- M |
|
- P |
|
- Y |
|
- ▁FOR |
|
- ER |
|
- ▁THIS |
|
- ▁WAS |
|
- RE |
|
- C |
|
- G |
|
- ▁SO |
|
- A |
|
- ▁BE |
|
- ▁THEY |
|
- ▁HAVE |
|
- ▁ARE |
|
- O |
|
- ▁ |
|
- ▁ON |
|
- ▁WITH |
|
- LY |
|
- ▁WHAT |
|
- U |
|
- IN |
|
- AL |
|
- ▁MY |
|
- I |
|
- ▁S |
|
- ▁DO |
|
- B |
|
- ▁RE |
|
- L |
|
- ▁ME |
|
- ▁CAN |
|
- ▁BUT |
|
- LE |
|
- ▁ABOUT |
|
- OR |
|
- ▁NOT |
|
- VE |
|
- F |
|
- AR |
|
- RA |
|
- ▁ALL |
|
- ▁OUR |
|
- ▁PEOPLE |
|
- ▁AT |
|
- ▁C |
|
- ▁AS |
|
- IC |
|
- ▁OR |
|
- ▁LIKE |
|
- W |
|
- LL |
|
- K |
|
- ▁AN |
|
- ▁THERE |
|
- ENT |
|
- ▁ONE |
|
- ES |
|
- ▁HE |
|
- RI |
|
- 'ON' |
|
- ▁P |
|
- ▁IF |
|
- ▁FROM |
|
- ▁JUST |
|
- ▁WHEN |
|
- TH |
|
- ▁YOUR |
|
- ▁US |
|
- CE |
|
- ▁DE |
|
- ION |
|
- IT |
|
- ▁KNOW |
|
- ▁HOW |
|
- ▁T |
|
- ▁BECAUSE |
|
- CH |
|
- V |
|
- ▁OUT |
|
- ▁B |
|
- ▁UP |
|
- ▁E |
|
- ▁F |
|
- TE |
|
- ▁HAD |
|
- ▁CO |
|
- LI |
|
- ▁TIME |
|
- ▁THEIR |
|
- ▁MORE |
|
- UR |
|
- ▁WHO |
|
- ▁GO |
|
- EN |
|
- ▁G |
|
- ATION |
|
- AN |
|
- CK |
|
- TER |
|
- ▁SEE |
|
- ▁WOULD |
|
- ▁THESE |
|
- ▁NO |
|
- ▁THEM |
|
- ▁BY |
|
- ▁THINK |
|
- ▁WERE |
|
- IL |
|
- ATE |
|
- ▁GET |
|
- ▁SE |
|
- ▁VERY |
|
- ▁GOING |
|
- ▁EX |
|
- ▁REALLY |
|
- ITY |
|
- ▁WAY |
|
- ▁CON |
|
- H |
|
- RO |
|
- ▁DON |
|
- ▁NOW |
|
- ▁W |
|
- X |
|
- NE |
|
- GE |
|
- ▁WILL |
|
- ▁MAKE |
|
- ▁WANT |
|
- ▁OTHER |
|
- ▁SOME |
|
- LA |
|
- ▁WORLD |
|
- ▁ST |
|
- ▁COULD |
|
- TION |
|
- ▁WORK |
|
- MENT |
|
- ▁SHE |
|
- ▁NEED |
|
- ▁PA |
|
- LO |
|
- OL |
|
- ▁SAY |
|
- ▁MO |
|
- ▁BA |
|
- IST |
|
- ▁FA |
|
- IR |
|
- ▁MA |
|
- ERS |
|
- ▁HAS |
|
- VER |
|
- ▁PO |
|
- IVE |
|
- ▁PRO |
|
- ▁LIFE |
|
- ▁INTO |
|
- ▁WHICH |
|
- ▁THINGS |
|
- ▁WHERE |
|
- ND |
|
- ▁LA |
|
- MP |
|
- ▁BEEN |
|
- ▁SOMETHING |
|
- MA |
|
- ▁THOSE |
|
- US |
|
- ▁NEW |
|
- ▁CH |
|
- ▁RA |
|
- ▁ACTUALLY |
|
- ▁YEARS |
|
- ▁EVEN |
|
- ▁TAKE |
|
- ▁LOOK |
|
- UL |
|
- ▁RIGHT |
|
- ▁SAID |
|
- TIC |
|
- ▁UN |
|
- Z |
|
- AS |
|
- ▁DAY |
|
- ▁HER |
|
- IDE |
|
- ▁BO |
|
- ▁THAN |
|
- ▁HERE |
|
- ▁OVER |
|
- ▁BACK |
|
- ▁LO |
|
- ▁FIRST |
|
- ▁DI |
|
- ▁MOST |
|
- ▁COME |
|
- ▁ALSO |
|
- VI |
|
- KE |
|
- ▁WELL |
|
- IES |
|
- ABLE |
|
- UT |
|
- ▁THEN |
|
- ▁CHANGE |
|
- AGE |
|
- ▁MUCH |
|
- '0' |
|
- ▁MEAN |
|
- OM |
|
- ▁CA |
|
- CO |
|
- AT |
|
- ▁ANY |
|
- ▁HAPPEN |
|
- ▁ONLY |
|
- ▁PART |
|
- ▁SU |
|
- ▁HIS |
|
- ▁SP |
|
- ▁DIS |
|
- ANCE |
|
- ID |
|
- ▁MANY |
|
- ▁RO |
|
- '}' |
|
- ▁{ |
|
- OW |
|
- ▁O |
|
- IGHT |
|
- ▁GOOD |
|
- UM |
|
- ▁LIVE |
|
- ▁LOT |
|
- ▁D |
|
- ▁TWO |
|
- ▁LI |
|
- ▁THING |
|
- ▁GOT |
|
- ▁TELL |
|
- AC |
|
- ▁EVERY |
|
- EL |
|
- CI |
|
- ▁WHY |
|
- TA |
|
- FUL |
|
- ▁BEING |
|
- ANT |
|
- EST |
|
- ▁LEARN |
|
- ▁COMP |
|
- ▁DID |
|
- URE |
|
- PE |
|
- ▁FEEL |
|
- ▁DIFFERENT |
|
- ▁PRE |
|
- MO |
|
- TI |
|
- ▁HO |
|
- ▁K |
|
- ▁LITTLE |
|
- IV |
|
- ▁THROUGH |
|
- ▁1 |
|
- INE |
|
- ▁KIND |
|
- ME |
|
- RY |
|
- ▁LET |
|
- ▁HELP |
|
- UN |
|
- ICAL |
|
- ▁VI |
|
- ▁SAME |
|
- ECT |
|
- ▁HUMAN |
|
- ▁GIVE |
|
- HE |
|
- ▁TALK |
|
- ▁FE |
|
- ▁HA |
|
- ▁OWN |
|
- ▁AROUND |
|
- ▁USE |
|
- IS |
|
- ALLY |
|
- ▁IDEA |
|
- RESS |
|
- ▁PROBLEM |
|
- ▁PERSON |
|
- ▁TE |
|
- ▁FI |
|
- ▁FIND |
|
- ▁SA |
|
- ▁START |
|
- OS |
|
- TED |
|
- ▁BU |
|
- LG |
|
- NCE |
|
- ATED |
|
- ▁YEAR |
|
- ▁DIDN |
|
- ▁LOVE |
|
- HO |
|
- '5' |
|
- ▁DOWN |
|
- ▁SCHOOL |
|
- ▁TODAY |
|
- ▁QUESTION |
|
- ▁HEAR |
|
- DI |
|
- ▁MAN |
|
- ▁CAR |
|
- MI |
|
- ▁GREAT |
|
- ▁CR |
|
- ▁DOING |
|
- IG |
|
- ▁FACT |
|
- ▁LE |
|
- ▁LONG |
|
- OUS |
|
- ▁RU |
|
- ▁PUT |
|
- ▁AFTER |
|
- ▁EN |
|
- ▁M |
|
- ▁GA |
|
- ▁SHOW |
|
- OP |
|
- ▁SI |
|
- ▁SHOULD |
|
- ▁NE |
|
- ▁STA |
|
- ▁NEVER |
|
- ▁BIG |
|
- NS |
|
- ▁THOUGHT |
|
- ISH |
|
- ▁MIGHT |
|
- ▁ACT |
|
- ▁PLACE |
|
- LU |
|
- END |
|
- IZE |
|
- ▁REAL |
|
- ▁BETTER |
|
- ATIVE |
|
- IA |
|
- ▁UNDERSTAND |
|
- ▁POWER |
|
- ▁IMPORTANT |
|
- IAN |
|
- ▁BRAIN |
|
- ▁SYSTEM |
|
- UAL |
|
- NESS |
|
- ▁END |
|
- ▁ABLE |
|
- ▁BEFORE |
|
- ▁STORY |
|
- ▁OFF |
|
- TOR |
|
- FF |
|
- ▁STARTED |
|
- ▁DR |
|
- ▁MADE |
|
- ▁ASK |
|
- NA |
|
- ▁HU |
|
- ▁CREATE |
|
- ATING |
|
- ▁BI |
|
- ARY |
|
- ▁HIGH |
|
- ▁HIM |
|
- BO |
|
- ITION |
|
- ▁THREE |
|
- ▁PER |
|
- ▁AM |
|
- ▁CALLED |
|
- ▁APP |
|
- ▁CAME |
|
- ▁WOMEN |
|
- ▁OLD |
|
- TY |
|
- ▁PLAY |
|
- '4' |
|
- PP |
|
- ▁PH |
|
- AG |
|
- ▁BELIEVE |
|
- ▁HOME |
|
- ARD |
|
- ▁FRIEND |
|
- ▁RI |
|
- ▁FOUND |
|
- HA |
|
- ▁HAND |
|
- ▁DA |
|
- ▁STILL |
|
- ▁NA |
|
- ▁WORD |
|
- ▁TRANS |
|
- ▁HEALTH |
|
- OUND |
|
- ▁BUILD |
|
- ▁CARE |
|
- ▁WI |
|
- ▁NEXT |
|
- ▁THANK |
|
- ▁TURN |
|
- ▁TOGETHER |
|
- ▁TA |
|
- ▁BECOME |
|
- ▁EXPERIENCE |
|
- VING |
|
- ▁EM |
|
- ▁MEN |
|
- ISE |
|
- ▁MAR |
|
- ▁EACH |
|
- ▁WENT |
|
- ▁TRI |
|
- ▁POINT |
|
- ▁LAST |
|
- ▁MAYBE |
|
- ▁EVER |
|
- ▁CALL |
|
- WARD |
|
- ▁CHILDREN |
|
- ▁DOES |
|
- CA |
|
- ▁BIT |
|
- UC |
|
- LIC |
|
- UGH |
|
- ▁EXAMPLE |
|
- ▁FEW |
|
- ITIES |
|
- ▁ANOTHER |
|
- SH |
|
- ▁TH |
|
- ▁ALWAYS |
|
- ▁H |
|
- ▁READ |
|
- ▁INTEREST |
|
- FORM |
|
- ▁STATE |
|
- ▁MOVE |
|
- IOUS |
|
- ▁MIND |
|
- 'NO' |
|
- AM |
|
- ▁TEACH |
|
- ▁2 |
|
- ▁HARD |
|
- ▁WANTED |
|
- ▁20 |
|
- ▁GROW |
|
- ▁JOB |
|
- DA |
|
- ▁TOO |
|
- ▁VA |
|
- OME |
|
- ▁MAY |
|
- '8' |
|
- ▁SOCIAL |
|
- ▁HI |
|
- ▁FOOD |
|
- BI |
|
- ▁JO |
|
- ▁COURSE |
|
- ▁FR |
|
- BA |
|
- ▁MOMENT |
|
- ▁AGAIN |
|
- ▁DOESN |
|
- ▁SHARE |
|
- ▁AWAY |
|
- ▁BETWEEN |
|
- ▁LESS |
|
- ▁SHA |
|
- ▁MONEY |
|
- ▁UNDER |
|
- BER |
|
- ▁DEVELOP |
|
- ▁SECOND |
|
- ▁NUMBER |
|
- ▁ART |
|
- QUE |
|
- ▁FAMILY |
|
- '1' |
|
- '7' |
|
- ▁SH |
|
- '6' |
|
- ▁EVERYTHING |
|
- ▁FAR |
|
- ▁WORKING |
|
- ▁KIDS |
|
- ▁PLAN |
|
- ▁CHA |
|
- ▁AGO |
|
- ▁PI |
|
- ▁ENOUGH |
|
- ISM |
|
- ▁AMERICA |
|
- ▁THINKING |
|
- ▁USED |
|
- ▁REASON |
|
- ▁TRY |
|
- ▁SOMEONE |
|
- ▁GENE |
|
- ▁CU |
|
- ▁STUDENT |
|
- ▁TOLD |
|
- ▁GU |
|
- ▁TRYING |
|
- ▁LEAD |
|
- ▁MYSELF |
|
- ▁BEST |
|
- ▁FUTURE |
|
- ▁MILLION |
|
- ▁SMALL |
|
- ▁TECHNOLOGY |
|
- LESS |
|
- ▁PASS |
|
- ▁DONE |
|
- ▁YOUNG |
|
- '9' |
|
- ▁SPACE |
|
- ▁WATER |
|
- ▁MATTER |
|
- ▁OPEN |
|
- ▁COUNTRY |
|
- ▁REMEMBER |
|
- ▁TALKING |
|
- ▁REALIZE |
|
- LAND |
|
- ▁RESEARCH |
|
- Q |
|
- IAL |
|
- ▁WAR |
|
- ▁GROUP |
|
- ▁BOOK |
|
- ▁KEEP |
|
- ▁DEF |
|
- ▁STOP |
|
- ▁HOPE |
|
- ▁CONNECT |
|
- ▁SENSE |
|
- ▁ANSWER |
|
- ▁WALK |
|
- ▁DESIGN |
|
- ▁WEEK |
|
- ▁LANGUAGE |
|
- ▁DATA |
|
- ▁LOOKING |
|
- ▁PERCENT |
|
- ADE |
|
- ▁CLASS |
|
- ▁WHOLE |
|
- ▁BODY |
|
- ▁FOUR |
|
- ▁OFTEN |
|
- ▁ELSE |
|
- ▁WITHOUT |
|
- ▁PROCESS |
|
- ▁FREE |
|
- ▁MAKING |
|
- IBLE |
|
- ▁BRING |
|
- ▁CHILD |
|
- ▁GETTING |
|
- ▁PROBABLY |
|
- ▁ALLOW |
|
- ▁SPEAK |
|
- ▁COMMUNITY |
|
- ▁HAVING |
|
- ▁TOOK |
|
- ▁OP |
|
- ▁JU |
|
- ▁MU |
|
- ▁FACE |
|
- ▁INFORMATION |
|
- ABILITY |
|
- ▁NAME |
|
- ▁NI |
|
- '2' |
|
- ▁GIRL |
|
- ▁CELL |
|
- ▁ANYTHING |
|
- ▁SCIENCE |
|
- ▁STAND |
|
- ▁WHILE |
|
- ▁SUCH |
|
- '000' |
|
- ▁CASE |
|
- J |
|
- ANG |
|
- ▁FIVE |
|
- ▁GUY |
|
- ▁FUN |
|
- ▁BUSINESS |
|
- ▁ROOM |
|
- ▁SELF |
|
- ▁LIVING |
|
- ▁SURE |
|
- ▁IMAGINE |
|
- ▁ASKED |
|
- ▁MIS |
|
- ▁ENERGY |
|
- ▁PROJECT |
|
- ▁STUDY |
|
- ▁DREAM |
|
- ▁10 |
|
- ▁STORIES |
|
- ▁ALREADY |
|
- ▁TERM |
|
- ▁EFFECT |
|
- ▁KNEW |
|
- ▁SOCIETY |
|
- ▁PRODUCT |
|
- ▁PRETTY |
|
- ▁EVERYONE |
|
- ▁HEAD |
|
- ▁19 |
|
- ▁JA |
|
- ▁LIGHT |
|
- ▁LISTEN |
|
- ▁MUSIC |
|
- ▁LARGE |
|
- ▁QUITE |
|
- ▁J |
|
- ▁BOTH |
|
- ▁CHALLENGE |
|
- ▁SORT |
|
- ▁FELT |
|
- ▁TREAT |
|
- ▁EDUCATION |
|
- ▁WRONG |
|
- ▁YOURSELF |
|
- ▁MIL |
|
- ▁OURSELVES |
|
- ▁SOUND |
|
- ▁PROGRAM |
|
- ▁3 |
|
- ▁CLOSE |
|
- ▁QUA |
|
- ▁SINGLE |
|
- ▁MINUTE |
|
- ▁NOTHING |
|
- ▁ENVIRONMENT |
|
- ▁PUBLIC |
|
- ▁ORDER |
|
- ▁OB |
|
- ▁TRUE |
|
- ▁STEP |
|
- ▁WONDER |
|
- ▁NIGHT |
|
- ▁YET |
|
- ▁EYE |
|
- ▁LEFT |
|
- SHIP |
|
- ▁VALUE |
|
- ▁WHETHER |
|
- ▁MOTHER |
|
- ▁SIMPLE |
|
- ▁NU |
|
- ▁WOMAN |
|
- ▁LU |
|
- ▁CONTROL |
|
- ▁COMING |
|
- ▁SAW |
|
- ▁LEVEL |
|
- ▁TEST |
|
- ▁POSSIBLE |
|
- ▁ACROSS |
|
- ▁HOUSE |
|
- ▁WATCH |
|
- ▁GOVERNMENT |
|
- ▁PARENTS |
|
- ▁HALF |
|
- ▁TEN |
|
- ▁DEEP |
|
- ▁CANCER |
|
- ▁ISSUE |
|
- ▁LATER |
|
- ▁SOMETIMES |
|
- ▁ANIMAL |
|
- ▁SUPPORT |
|
- ▁EAT |
|
- ▁CULTURE |
|
- ▁FULL |
|
- ▁INSTEAD |
|
- ▁EARTH |
|
- ▁DISEASE |
|
- ▁MIN |
|
- ▁GAME |
|
- ▁DECIDED |
|
- ▁ALMOST |
|
- ▁SUCCESS |
|
- ▁AMAZING |
|
- ▁DRIVE |
|
- ▁DU |
|
- ▁EMOTION |
|
- ▁GLOBAL |
|
- ▁EQU |
|
- ▁PLANET |
|
- ▁CERTAIN |
|
- ▁HISTORY |
|
- ▁MEET |
|
- ▁TRAIN |
|
- ▁COMPUTER |
|
- ▁BECAME |
|
- ▁TEAM |
|
- ▁DISCOVER |
|
- ▁DIFFERENCE |
|
- WAY |
|
- ▁FOCUS |
|
- ▁PAST |
|
- ▁RESULT |
|
- ▁MONTHS |
|
- ▁MODEL |
|
- ▁YES |
|
- ▁VO |
|
- ▁COUNTRIES |
|
- ▁STUFF |
|
- ▁FIGURE |
|
- ▁30 |
|
- ▁PATIENT |
|
- ▁SPEND |
|
- ▁ENTIRE |
|
- ▁INDIVIDUAL |
|
- ▁UNTIL |
|
- ▁THOUGH |
|
- ▁DECISION |
|
- ▁CHOICE |
|
- ▁AFRICA |
|
- ▁RELATIONSHIP |
|
- ▁BREAK |
|
- ▁SOMEBODY |
|
- ▁FOLLOW |
|
- ▁CONVERSATION |
|
- ▁LEAVE |
|
- ▁THOUSAND |
|
- ▁SIGN |
|
- ▁SINCE |
|
- ▁DIFFICULT |
|
- ▁IMPACT |
|
- ▁HOURS |
|
- ▁COUPLE |
|
- ▁CAUSE |
|
- ▁PARTICULAR |
|
- ▁DOCTOR |
|
- ▁TAKING |
|
- ▁COMPANY |
|
- ▁EVERYBODY |
|
- ▁50 |
|
- ▁DIRECT |
|
- ▁EXPECT |
|
- ▁200 |
|
- ▁ORGAN |
|
- ▁EXACTLY |
|
- ▁THEMSELVES |
|
- ▁HAPPY |
|
- ▁MUST |
|
- ▁SAFE |
|
- ▁BASED |
|
- ▁BEAUTIFUL |
|
- ▁PHONE |
|
- ▁AGAINST |
|
- ▁WRITE |
|
- ▁DRUG |
|
- ▁PICTURE |
|
- ▁MEDIA |
|
- ▁WAIT |
|
- ▁FRONT |
|
- ▁RISK |
|
- ▁BEHAVIOR |
|
- ▁BLACK |
|
- ▁100 |
|
- ▁NATURE |
|
- ▁ORGANIZATION |
|
- ▁HUNDRED |
|
- ▁EASY |
|
- ▁ACCESS |
|
- ▁HOLD |
|
- ▁COMMON |
|
- ▁MARKET |
|
- ▁GRAND |
|
- ▁VOICE |
|
- ▁DEATH |
|
- ▁PIECE |
|
- ▁BILLION |
|
- ▁LEAST |
|
- ▁DURING |
|
- '3' |
|
- ▁NATURAL |
|
- ▁TYPE |
|
- ▁INVEST |
|
- ▁GENERATION |
|
- ENCY |
|
- ▁STRONG |
|
- OLOGICAL |
|
- ▁CLEAR |
|
- ▁PRESENT |
|
- ▁INTERNET |
|
- ▁KILL |
|
- OLOGY |
|
- ▁SUPER |
|
- ▁UNITED |
|
- ▁IMAGE |
|
- ▁RATHER |
|
- ▁SOLUTION |
|
- ▁ECONOMIC |
|
- ▁PROTECT |
|
- ▁BEHIND |
|
- ▁COLLECT |
|
- ▁SCIENTIST |
|
- UDE |
|
- ▁PRODUCE |
|
- ▁PERFECT |
|
- ▁DOLLARS |
|
- ▁VIEW |
|
- ▁CONSIDER |
|
- ▁THIRD |
|
- ▁MACHINE |
|
- ▁OUTSIDE |
|
- ▁SKILL |
|
- ▁EXPERIMENT |
|
- ▁COLLEGE |
|
- ▁QUI |
|
- ▁OPPORTUNITY |
|
- ▁LOCAL |
|
- ▁SIMPLY |
|
- ▁EARLY |
|
- ▁MAJOR |
|
- ▁CANNOT |
|
- ▁PHYSICAL |
|
- ▁WHATEVER |
|
- ▁MIDDLE |
|
- ▁VIDEO |
|
- ▁ALONG |
|
- OGRAPH |
|
- ▁SOLVE |
|
- ▁KEY |
|
- ▁TRUST |
|
- ▁FIELD |
|
- HOOD |
|
- ▁ATTENTION |
|
- ▁MICRO |
|
- ▁SHORT |
|
- ▁SITUATION |
|
- ▁STREET |
|
- ▁COMPANIES |
|
- ▁POLITICAL |
|
- ▁NORMAL |
|
- ▁AMOUNT |
|
- ▁SERVICE |
|
- ▁OBJECT |
|
- ▁POTENTIAL |
|
- ▁COLOR |
|
- ▁KNOWLEDGE |
|
- ▁MORNING |
|
- ▁TRUTH |
|
- ▁UNIVERSITY |
|
- ▁PROVIDE |
|
- ▁RESOURCE |
|
- ▁POSITIVE |
|
- ▁EUROPE |
|
- ▁SPECIAL |
|
- ▁CONTINUE |
|
- ▁BASICALLY |
|
- ▁SMART |
|
- ▁PRACTICE |
|
- ▁POPULATION |
|
- ▁TRAVEL |
|
- ▁AFFECT |
|
- ▁FINALLY |
|
- ▁APPROACH |
|
- ▁COUNT |
|
- ▁PERHAPS |
|
- ▁INTERACT |
|
- ▁EXPLAIN |
|
- ▁ENGINEER |
|
- ▁ENGAGE |
|
- ▁SITTING |
|
- ▁OFFICE |
|
- ▁COMPLEX |
|
- ▁WHITE |
|
- ▁GENDER |
|
- ▁MESSAGE |
|
- ▁WORTH |
|
- ▁ITSELF |
|
- IZATION |
|
- ▁BUILT |
|
- ▁IMPROVE |
|
- ▁OKAY |
|
- ▁PRISON |
|
- ▁MATERIAL |
|
- ▁NETWORK |
|
- ▁EITHER |
|
- ▁GIVING |
|
- ▁LIMIT |
|
- ▁MEASURE |
|
- ▁DARK |
|
- ▁AUDIENCE |
|
- ▁ACCEPT |
|
- ▁RECORD |
|
- ▁OCEAN |
|
- ▁CHOOSE |
|
- ▁SPECIES |
|
- ▁YORK |
|
- ▁SUSTAIN |
|
- ▁SLEEP |
|
- ▁OBVIOUS |
|
- ▁HOSPITAL |
|
- ▁PERSPECTIVE |
|
- ▁INCREASE |
|
- ▁OPERA |
|
- ▁TAUGHT |
|
- ▁MULTI |
|
- ▁CHANGING |
|
- ▁JOURNEY |
|
- ▁INDUSTRY |
|
- ▁NEURO |
|
- ▁REQUIRE |
|
- ▁DECADE |
|
- ▁CURRENT |
|
- ▁PUSH |
|
- ▁BENEFIT |
|
- ▁YEAH |
|
- ▁BLOOD |
|
- ▁SCALE |
|
- ▁ESPECIALLY |
|
- ▁COMMUNITIES |
|
- ▁ADULT |
|
- ▁CHARACTER |
|
- ▁REPRESENT |
|
- IFIED |
|
- ▁SUFFER |
|
- ▁RECOGNIZE |
|
- ▁CENTURY |
|
- ▁SUDDEN |
|
- ▁FUNCTION |
|
- ▁ACHIEVE |
|
- ▁SIMILAR |
|
- ▁BROUGHT |
|
- ▁TRADITION |
|
- ▁UNIVERSE |
|
- ▁CLIMATE |
|
- ▁BREATH |
|
- ▁EXTREME |
|
- ▁REPORT |
|
- ▁DAUGHTER |
|
- ▁COMFORT |
|
- ▁CONCEPT |
|
- ▁ECONOMY |
|
- ▁INNOVATION |
|
- ▁QUICKLY |
|
- ▁SUGGEST |
|
- ▁SPECIFIC |
|
- ▁CRAZY |
|
- ▁CONSCIOUS |
|
- ▁SPREAD |
|
- ▁TRULY |
|
- '{' |
|
- <sos/eos> |
|
init: xavier_uniform |
|
input_size: 2048 |
|
ctc_conf: |
|
dropout_rate: 0.0 |
|
ctc_type: builtin |
|
reduce: true |
|
ignore_nan_grad: null |
|
zero_infinity: true |
|
joint_net_conf: null |
|
use_preprocessor: true |
|
token_type: bpe |
|
bpemodel: data/en_token_list/bpe_unigram1000/bpe.model |
|
non_linguistic_symbols: null |
|
cleaner: null |
|
g2p: null |
|
speech_volume_normalize: null |
|
rir_scp: null |
|
rir_apply_prob: 1.0 |
|
noise_scp: null |
|
noise_apply_prob: 1.0 |
|
noise_db_range: '13_15' |
|
short_noise_thres: 0.5 |
|
aux_ctc_tasks: [] |
|
frontend: null |
|
frontend_conf: {} |
|
specaug: null |
|
specaug_conf: {} |
|
normalize: global_mvn |
|
normalize_conf: |
|
stats_file: exp/asr_stats_extracted_en_bpe1000/train/feats_stats.npz |
|
model: espnet |
|
model_conf: |
|
ctc_weight: 0.3 |
|
lsm_weight: 0.1 |
|
length_normalized_loss: false |
|
preencoder: null |
|
preencoder_conf: {} |
|
encoder: avhubert |
|
encoder_conf: |
|
avhubert_url: https://dl.fbaipublicfiles.com/avhubert/model/lrs3_vox/noise-pretrain/large_vox_iter5.pt |
|
avhubert_dir_path: ./local/pre-trained |
|
encoder_embed_dim: 1024 |
|
encoder_attention_heads: 16 |
|
encoder_ffn_embed_dim: 4096 |
|
encoder_layers: 24 |
|
dropout: 0.1 |
|
dropout_features: 0.1 |
|
encoder_layerdrop: 0.05 |
|
attention_dropout: 0.1 |
|
extracted: true |
|
freeze_finetune_updates: 10000 |
|
feature_grad_mult: 1.0 |
|
postencoder: null |
|
postencoder_conf: {} |
|
decoder: transformer |
|
decoder_conf: |
|
attention_heads: 4 |
|
linear_units: 4096 |
|
num_blocks: 6 |
|
dropout_rate: 0.1 |
|
positional_dropout_rate: 0.1 |
|
self_attention_dropout_rate: 0.1 |
|
src_attention_dropout_rate: 0.1 |
|
preprocessor: default |
|
preprocessor_conf: {} |
|
required: |
|
- output_dir |
|
- token_list |
|
version: '202308' |
|
distributed: true |
|
``` |
|
|
|
</details> |
|
|
|
|
|
|
|
### Citing ESPnet |
|
|
|
```BibTex |
|
@inproceedings{watanabe2018espnet, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
title={{ESPnet}: End-to-End Speech Processing Toolkit}, |
|
year={2018}, |
|
booktitle={Proceedings of Interspeech}, |
|
pages={2207--2211}, |
|
doi={10.21437/Interspeech.2018-1456}, |
|
url={http://dx.doi.org/10.21437/Interspeech.2018-1456} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
``` |
|
|
|
or arXiv: |
|
|
|
```bibtex |
|
@misc{watanabe2018espnet, |
|
title={ESPnet: End-to-End Speech Processing Toolkit}, |
|
author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai}, |
|
year={2018}, |
|
eprint={1804.00015}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL} |
|
} |
|
``` |
|
|