ESPnet2-SLU / slurp /config.yaml
ssiidd's picture
Add slurp model files
65191fa
config: conf/tuning/train_asr_conformer.yaml
print_config: false
log_level: INFO
dry_run: false
iterator_type: sequence
output_dir: exp/asr_train_asr_conformer_raw_en_word
ngpu: 1
seed: 0
num_workers: 1
num_att_plot: 3
dist_backend: nccl
dist_init_method: env://
dist_world_size: null
dist_rank: null
local_rank: 0
dist_master_addr: null
dist_master_port: null
dist_launcher: null
multiprocessing_distributed: false
unused_parameters: false
sharded_ddp: false
cudnn_enabled: true
cudnn_benchmark: false
cudnn_deterministic: true
collect_stats: false
write_collected_feats: false
max_epoch: 50
patience: null
val_scheduler_criterion:
- valid
- loss
early_stopping_criterion:
- valid
- loss
- min
best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 10
grad_clip: 5.0
grad_clip_type: 2.0
grad_noise: false
accum_grad: 1
no_forward_run: false
resume: true
train_dtype: float32
use_amp: false
log_interval: null
use_tensorboard: true
use_wandb: false
wandb_project: null
wandb_id: null
wandb_entity: null
wandb_name: null
wandb_model_log_interval: -1
detect_anomaly: false
pretrain_path: null
init_param: []
ignore_init_mismatch: false
freeze_param: []
num_iters_per_epoch: null
batch_size: 20
valid_batch_size: null
batch_bins: 1000000
valid_batch_bins: null
train_shape_file:
- exp/asr_stats_raw_en_word/train/speech_shape
- exp/asr_stats_raw_en_word/train/text_shape.word
valid_shape_file:
- exp/asr_stats_raw_en_word/valid/speech_shape
- exp/asr_stats_raw_en_word/valid/text_shape.word
batch_type: folded
valid_batch_type: null
fold_length:
- 80000
- 150
sort_in_batch: descending
sort_batch: descending
multiple_iterator: false
chunk_length: 500
chunk_shift_ratio: 0.5
num_cache_chunks: 1024
train_data_path_and_name_and_type:
- - dump/raw/train/wav.scp
- speech
- sound
- - dump/raw/train/text
- text
- text
valid_data_path_and_name_and_type:
- - dump/raw/devel/wav.scp
- speech
- sound
- - dump/raw/devel/text
- text
- text
allow_variable_data_keys: false
max_cache_size: 0.0
max_cache_fd: 32
valid_max_cache_size: null
optim: adam
optim_conf:
lr: 0.0002
scheduler: warmuplr
scheduler_conf:
warmup_steps: 25000
token_list:
- <blank>
- <unk>
- ▁the
- s
- ▁to
- ▁i
- ▁me
- ▁you
- ▁what
- ▁a
- ▁is
- ▁my
- ▁please
- a
- ''''
- y
- ▁in
- ing
- ▁s
- e
- ▁for
- i
- ▁on
- d
- t
- o
- u
- er
- p
- ▁of
- es
- re
- l
- ▁it
- ▁p
- le
- ▁f
- ▁m
- ▁email
- ▁d
- m
- ▁c
- st
- r
- n
- ar
- ▁h
- b
- ▁that
- c
- ▁this
- h
- an
- email_query
- ▁play
- ▁re
- ▁b
- ▁do
- ▁can
- at
- ▁have
- g
- ▁from
- ▁and
- en
- email_sendemail
- ▁olly
- 'on'
- ▁new
- it
- qa_factoid
- calendar_set
- ▁any
- or
- ▁g
- ▁how
- ▁t
- ▁tell
- ch
- ▁not
- ▁about
- ▁at
- ate
- general_negate
- f
- ▁today
- ▁e
- ed
- ▁list
- ▁r
- in
- k
- ic
- social_post
- ▁are
- play_music
- general_quirky
- ▁l
- al
- v
- ent
- ▁n
- ▁be
- ▁an
- ▁st
- et
- ▁am
- general_praise
- ▁time
- weather_query
- ▁up
- ▁check
- calendar_query
- ▁w
- om
- ur
- ▁send
- ▁with
- ly
- w
- general_explain
- ad
- ▁th
- news_query
- ▁one
- ▁emails
- day
- ▁sh
- ce
- ▁last
- ve
- ▁he
- z
- ▁ch
- ▁will
- ▁set
- ▁would
- ▁was
- x
- general_repeat
- ▁add
- ou
- ▁again
- ▁ex
- is
- ct
- general_affirm
- general_confirm
- ▁song
- ▁next
- ▁j
- ▁meeting
- um
- ation
- ▁turn
- ▁did
- if
- ▁alarm
- am
- ▁like
- datetime_query
- ter
- ▁remind
- ▁o
- qa_definition
- ▁said
- ▁calendar
- ll
- se
- ers
- th
- ▁get
- our
- ▁need
- ▁all
- ot
- ▁want
- ▁off
- and
- ▁right
- ▁de
- ▁tr
- ut
- general_dontcare
-
- ▁week
- as
- ▁tweet
- ight
- ir
- ▁your
- ▁event
- ▁news
- ▁se
- ay
- ion
- ▁com
- ▁there
- ▁ye
- ▁weather
- un
- ▁confirm
- ld
- calendar_remove
- ▁y
- ▁lights
- ▁more
- ▁v
- play_radio
- ▁does
- ▁po
- ▁now
- id
- email_querycontact
- ▁show
- ▁could
- ery
- op
- ▁day
- ▁pm
- ▁music
- ▁tomorrow
- ▁train
- ▁u
- ine
- ▁or
- ange
- qa_currency
- ice
- ▁contact
- ▁just
- ▁jo
- ▁think
- qa_stock
- end
- ss
- ber
- ▁tw
- ▁command
- ▁make
- ▁no
- ▁mo
- pe
- ▁find
- general_commandstop
- ▁when
- social_query
- ▁so
- ong
- ▁co
- ant
- ow
- ▁much
- ▁where
- ul
- ue
- ri
- ap
- ▁start
- ▁mar
- ▁by
- one
- ▁know
- ▁wor
- oo
- ▁give
- ▁let
- ▁events
- der
- ▁ro
- ▁pr
- ▁pl
- play_podcasts
- art
- us
- ▁work
- ▁current
- ol
- cooking_recipe
- nt
- ▁correct
- transport_query
- ia
- ▁stock
- ▁br
- ive
- ▁app
- ▁two
- ▁latest
- lists_query
- ▁some
- recommendation_events
- ab
- ▁go
- ▁but
- ook
- ke
- alarm_set
- play_audiobook
- ▁k
- ▁response
- ▁wr
- cast
- ▁open
- ▁cle
- ▁done
- ▁got
- ▁ca
- ite
- ase
- ▁thank
- iv
- ah
- ag
- ▁answer
- ie
- ▁five
- ▁book
- ist
- ▁rec
- ore
- ▁john
- ment
- ▁appreci
- ▁fri
- ack
- ▁remove
- ated
- ock
- ree
- j
- ▁good
- ▁many
- orn
- fe
- ▁radio
- ▁we
- int
- ▁facebook
- ▁cl
- ▁sev
- ▁schedule
- ard
- ▁per
- ▁li
- ▁going
- nd
- ain
- recommendation_locations
- ▁post
- lists_createoradd
- ff
- ▁su
- red
- iot_hue_lightoff
- lists_remove
- ▁ar
- een
- ▁say
- ro
- ▁volume
- ▁le
- ▁reply
- ▁complaint
- ▁out
- ▁delete
- ▁ne
- ame
- ▁detail
- ▁if
- im
- ▁happ
- orr
- ich
- em
- ▁ev
- ction
- ▁dollar
- ▁as
- alarm_query
- audio_volume_mute
- ac
- music_query
- ▁mon
- ther
- ▁thanks
- cel
- ▁who
- ave
- ▁service
- ▁mail
- ty
- ▁hear
- de
- ▁si
- ▁wh
- ood
- ell
- ▁con
- ▁once
- ound
- ▁don
- ▁loc
- ▁light
- ▁birthday
- ▁inf
- ort
- ffe
- ▁playlist
- el
- ening
- ▁us
- ▁un
- ▁has
- own
- ▁inc
- ai
- ▁speak
- age
- ▁mess
- ast
- ci
- ver
- ▁ten
- ▁underst
- ▁pro
- ▁q
- enty
- ▁ticket
- gh
- audio_volume_up
- ▁take
- ▁bo
- ally
- ome
- transport_ticket
- ind
- iot_hue_lightchange
- pp
- iot_coffee
- ▁res
- plain
- io
- lar
- takeaway_query
- ge
- takeaway_order
- email_addcontact
- play_game
- ak
- ▁fa
- transport_traffic
- music_likeness
- ▁rep
- act
- ust
- transport_taxi
- iot_hue_lightdim
- ▁mu
- ▁ti
- ick
- ▁ha
- ould
- general_joke
- '1'
- qa_maths
- ▁lo
- iot_cleaning
- q
- ake
- ill
- her
- iot_hue_lightup
- pl
- '2'
- alarm_remove
- orrect
- ▁cont
- mail
- out
- audio_volume_down
- book
- ail
- recommendation_movies
- ck
- ▁man
- ▁mus
- ▁che
- me
- ume
- ▁answ
- datetime_convert
- ▁late
- iot_wemo_on
- ▁twe
- music_settings
- iot_wemo_off
- orre
- ith
- ▁tom
- ▁fr
- ere
- ▁ad
- xt
- ▁ab
- ank
- general_greet
- now
- ▁meet
- ▁curre
- ▁respon
- ▁ag
- ght
- audio_volume_other
- ink
- ▁spe
- iot_hue_lighton
- ▁rem
- lly
- '?'
- urn
- ▁op
- ▁complain
- ▁comm
- let
- music_dislikeness
- ove
- ▁sch
- ather
- ▁rad
- edule
- ▁under
- icket
- lease
- ▁bir
- erv
- ▁birth
- ▁face
- ▁cur
- sw
- ▁serv
- ek
- aid
- '9'
- ▁vol
- edu
- '5'
- cooking_query
- lete
- ▁joh
- ▁det
- firm
- nder
- '0'
- irm
- '8'
- '&'
- _
- list
- pon
- qa_query
- '7'
- '3'
- '-'
- reci
- ▁doll
- <sos/eos>
init: null
input_size: null
ctc_conf:
dropout_rate: 0.0
ctc_type: builtin
reduce: true
ignore_nan_grad: true
model_conf:
ctc_weight: 0.3
lsm_weight: 0.1
length_normalized_loss: false
extract_feats_in_collect_stats: false
use_preprocessor: true
token_type: word
bpemodel: null
non_linguistic_symbols: null
cleaner: null
g2p: null
speech_volume_normalize: null
rir_scp: null
rir_apply_prob: 1.0
noise_scp: null
noise_apply_prob: 1.0
noise_db_range: '13_15'
frontend: default
frontend_conf:
fs: 16k
specaug: specaug
specaug_conf:
apply_time_warp: true
time_warp_window: 5
time_warp_mode: bicubic
apply_freq_mask: true
freq_mask_width_range:
- 0
- 30
num_freq_mask: 2
apply_time_mask: true
time_mask_width_range:
- 0
- 40
num_time_mask: 2
normalize: utterance_mvn
normalize_conf: {}
preencoder: null
preencoder_conf: {}
encoder: conformer
encoder_conf:
output_size: 512
attention_heads: 8
linear_units: 2048
num_blocks: 12
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: conv2d
normalize_before: true
macaron_style: true
pos_enc_layer_type: rel_pos
selfattention_layer_type: rel_selfattn
activation_type: swish
use_cnn_module: true
cnn_module_kernel: 31
postencoder: null
postencoder_conf: {}
decoder: transformer
decoder_conf:
attention_heads: 8
linear_units: 2048
num_blocks: 6
dropout_rate: 0.1
positional_dropout_rate: 0.1
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1
required:
- output_dir
- token_list
version: 0.10.3a2
distributed: false