SLAM5566
/

huggingface_models

Upload 43 files

297c44b over 1 year ago

1.09 kB

	# Generated 2021-10-04 from:
	# /mnt/md0/user_wayne/speechbrain/recipes/MATBN/Tokenizer/hparams/tokenizer_seg_bpe5k_char.yaml
	# yamllint disable
	dataset_folder: /home/wayne/CORPUS/MATBN_SEG
	prepare_folder: results/prepare_seg
	output_folder: results/tokenizer_seg_bpe5k_char
	keep_unk: false

	token_type: char # ["unigram", "bpe", "char"]
	token_output: 5000 # index(blank/eos/bos/unk) = 0
	character_coverage: 1.0
	annotation_read: transcription

	train_json: results/prepare_seg/train.json
	dev_json: results/prepare_seg/dev.json
	eval_json: results/prepare_seg/eval.json
	test_json: results/prepare_seg/test.json


	tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
	model_dir: results/tokenizer_seg_bpe5k_char
	vocab_size: 5000
	annotation_train: results/prepare_seg/train.json
	annotation_read: transcription
	model_type: char # ["unigram", "bpe", "char"]
	character_coverage: 1.0
	annotation_list_to_check: [results/prepare_seg/dev.json, results/prepare_seg/eval.json,
	results/prepare_seg/test.json]
	annotation_format: json
	bos_id: 1
	eos_id: 2