Axelisme's picture
Upload 43 files
297c44b
raw
history blame
1.09 kB
# Generated 2021-10-04 from:
# /mnt/md0/user_wayne/speechbrain/recipes/MATBN/Tokenizer/hparams/tokenizer_seg_bpe5k_char.yaml
# yamllint disable
dataset_folder: /home/wayne/CORPUS/MATBN_SEG
prepare_folder: results/prepare_seg
output_folder: results/tokenizer_seg_bpe5k_char
keep_unk: false
token_type: char # ["unigram", "bpe", "char"]
token_output: 5000 # index(blank/eos/bos/unk) = 0
character_coverage: 1.0
annotation_read: transcription
train_json: results/prepare_seg/train.json
dev_json: results/prepare_seg/dev.json
eval_json: results/prepare_seg/eval.json
test_json: results/prepare_seg/test.json
tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
model_dir: results/tokenizer_seg_bpe5k_char
vocab_size: 5000
annotation_train: results/prepare_seg/train.json
annotation_read: transcription
model_type: char # ["unigram", "bpe", "char"]
character_coverage: 1.0
annotation_list_to_check: [results/prepare_seg/dev.json, results/prepare_seg/eval.json,
results/prepare_seg/test.json]
annotation_format: json
bos_id: 1
eos_id: 2