File size: 2,899 Bytes
1b6a0e4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# This is an example that demonstrates how to configure a model file.
# You can modify the configuration according to your own requirements.
# to print the register_table:
# from funasr.register import tables
# tables.print()
# network architecture
#model: funasr.models.paraformer.model:Paraformer
model: BiCifParaformer
model_conf:
ctc_weight: 0.0
lsm_weight: 0.1
length_normalized_loss: true
predictor_weight: 1.0
predictor_bias: 1
sampling_ratio: 0.75
# encoder
encoder: SANMEncoder
encoder_conf:
output_size: 512
attention_heads: 4
linear_units: 2048
num_blocks: 50
dropout_rate: 0.1
positional_dropout_rate: 0.1
attention_dropout_rate: 0.1
input_layer: pe
pos_enc_class: SinusoidalPositionEncoder
normalize_before: true
kernel_size: 11
sanm_shfit: 0
selfattention_layer_type: sanm
# decoder
decoder: ParaformerSANMDecoder
decoder_conf:
attention_heads: 4
linear_units: 2048
num_blocks: 16
dropout_rate: 0.1
positional_dropout_rate: 0.1
self_attention_dropout_rate: 0.1
src_attention_dropout_rate: 0.1
att_layer_num: 16
kernel_size: 11
sanm_shfit: 0
predictor: CifPredictorV3
predictor_conf:
idim: 512
threshold: 1.0
l_order: 1
r_order: 1
tail_threshold: 0.45
smooth_factor2: 0.25
noise_threshold2: 0.01
upsample_times: 3
use_cif1_cnn: false
upsample_type: cnn_blstm
# frontend related
frontend: WavFrontend
frontend_conf:
fs: 16000
window: hamming
n_mels: 80
frame_length: 25
frame_shift: 10
lfr_m: 7
lfr_n: 6
specaug: SpecAugLFR
specaug_conf:
apply_time_warp: false
time_warp_window: 5
time_warp_mode: bicubic
apply_freq_mask: true
freq_mask_width_range:
- 0
- 30
lfr_rate: 6
num_freq_mask: 1
apply_time_mask: true
time_mask_width_range:
- 0
- 12
num_time_mask: 1
train_conf:
accum_grad: 1
grad_clip: 5
max_epoch: 150
val_scheduler_criterion:
- valid
- acc
best_model_criterion:
- - valid
- acc
- max
keep_nbest_models: 10
log_interval: 50
optim: adam
optim_conf:
lr: 0.0005
scheduler: warmuplr
scheduler_conf:
warmup_steps: 30000
dataset: AudioDataset
dataset_conf:
index_ds: IndexDSJsonl
batch_sampler: DynamicBatchLocalShuffleSampler
batch_type: example # example or length
batch_size: 1 # if batch_type is example, batch_size is the numbers of samples; if length, batch_size is source_token_len+target_token_len;
max_token_length: 2048 # filter samples if source_token_len+target_token_len > max_token_length,
buffer_size: 500
shuffle: True
num_workers: 0
tokenizer: CharTokenizer
tokenizer_conf:
unk_symbol: <unk>
split_with_space: true
ctc_conf:
dropout_rate: 0.0
ctc_type: builtin
reduce: true
ignore_nan_grad: true
normalize: null
|