bark-small / generation_config.json

Upload config

b192d44 over 1 year ago

4.74 kB

	{
	"coarse_acoustics_config": {
	"_from_model_config": false,
	"bad_words_ids": null,
	"begin_suppress_tokens": null,
	"bos_token_id": null,
	"coarse_infer_token": 12050,
	"coarse_rate_hz": 75,
	"coarse_semantic_pad_token": 12048,
	"constraints": null,
	"decoder_start_token_id": null,
	"diversity_penalty": 0.0,
	"do_sample": true,
	"early_stopping": false,
	"encoder_no_repeat_ngram_size": 0,
	"encoder_repetition_penalty": 1.0,
	"eos_token_id": null,
	"epsilon_cutoff": 0.0,
	"eta_cutoff": 0.0,
	"exponential_decay_length_penalty": null,
	"force_words_ids": null,
	"forced_bos_token_id": null,
	"forced_decoder_ids": null,
	"forced_eos_token_id": null,
	"generation_kwargs": {},
	"length_penalty": 1.0,
	"max_coarse_history": 630,
	"max_coarse_input_length": 256,
	"max_length": 20,
	"max_new_tokens": null,
	"max_time": null,
	"min_length": 0,
	"min_new_tokens": null,
	"n_coarse_codebooks": 2,
	"no_repeat_ngram_size": 0,
	"num_beam_groups": 1,
	"num_beams": 1,
	"num_return_sequences": 1,
	"output_attentions": false,
	"output_hidden_states": false,
	"output_scores": false,
	"pad_token_id": null,
	"penalty_alpha": null,
	"remove_invalid_values": false,
	"renormalize_logits": true,
	"repetition_penalty": 1.0,
	"return_dict_in_generate": false,
	"sliding_window_len": 60,
	"suppress_tokens": null,
	"temperature": 0.7,
	"top_k": 50,
	"top_p": 1.0,
	"transformers_version": "4.31.0.dev0",
	"typical_p": 1.0,
	"use_cache": true
	},
	"codebook_size": 1024,
	"fine_acoustics_config": {
	"_from_model_config": false,
	"bad_words_ids": null,
	"begin_suppress_tokens": null,
	"bos_token_id": null,
	"constraints": null,
	"decoder_start_token_id": null,
	"diversity_penalty": 0.0,
	"do_sample": false,
	"early_stopping": false,
	"encoder_no_repeat_ngram_size": 0,
	"encoder_repetition_penalty": 1.0,
	"eos_token_id": null,
	"epsilon_cutoff": 0.0,
	"eta_cutoff": 0.0,
	"exponential_decay_length_penalty": null,
	"force_words_ids": null,
	"forced_bos_token_id": null,
	"forced_decoder_ids": null,
	"forced_eos_token_id": null,
	"generation_kwargs": {},
	"length_penalty": 1.0,
	"max_fine_history_length": 512,
	"max_fine_input_length": 1024,
	"max_length": 20,
	"max_new_tokens": null,
	"max_time": null,
	"min_length": 0,
	"min_new_tokens": null,
	"n_fine_codebooks": 8,
	"no_repeat_ngram_size": 0,
	"num_beam_groups": 1,
	"num_beams": 1,
	"num_return_sequences": 1,
	"output_attentions": false,
	"output_hidden_states": false,
	"output_scores": false,
	"pad_token_id": null,
	"penalty_alpha": null,
	"remove_invalid_values": false,
	"renormalize_logits": false,
	"repetition_penalty": 1.0,
	"return_dict_in_generate": false,
	"suppress_tokens": null,
	"temperature": 0.5,
	"top_k": 50,
	"top_p": 1.0,
	"transformers_version": "4.31.0.dev0",
	"typical_p": 1.0,
	"use_cache": true
	},
	"model_type": "bark",
	"sample_rate": 24000,
	"semantic_config": {
	"_from_model_config": false,
	"bad_words_ids": null,
	"begin_suppress_tokens": null,
	"bos_token_id": null,
	"constraints": null,
	"decoder_start_token_id": null,
	"diversity_penalty": 0.0,
	"do_sample": true,
	"early_stopping": false,
	"encoder_no_repeat_ngram_size": 0,
	"encoder_repetition_penalty": 1.0,
	"eos_token_id": 10000,
	"epsilon_cutoff": 0.0,
	"eta_cutoff": 0.0,
	"exponential_decay_length_penalty": null,
	"force_words_ids": null,
	"forced_bos_token_id": null,
	"forced_decoder_ids": null,
	"forced_eos_token_id": null,
	"generation_kwargs": {},
	"length_penalty": 1.0,
	"max_input_semantic_length": 256,
	"max_length": 20,
	"max_new_tokens": 768,
	"max_time": null,
	"min_length": 0,
	"min_new_tokens": null,
	"no_repeat_ngram_size": 0,
	"num_beam_groups": 1,
	"num_beams": 1,
	"num_return_sequences": 1,
	"output_attentions": false,
	"output_hidden_states": false,
	"output_scores": false,
	"pad_token_id": null,
	"penalty_alpha": null,
	"remove_invalid_values": false,
	"renormalize_logits": true,
	"repetition_penalty": 1.0,
	"return_dict_in_generate": false,
	"semantic_infer_token": 129599,
	"semantic_pad_token": 10000,
	"semantic_rate_hz": 49.9,
	"semantic_vocab_size": 10000,
	"suppress_tokens": null,
	"temperature": 0.7,
	"text_encoding_offset": 10048,
	"text_pad_token": 129595,
	"top_k": 50,
	"top_p": 1.0,
	"transformers_version": "4.31.0.dev0",
	"typical_p": 1.0,
	"use_cache": true
	}
	}