Felladrin
/

mlc-q4f16-Llama3-ChatQA-1.5-8B

Model card Files Files and versions Community

mlc-q4f16-Llama3-ChatQA-1.5-8B / mlc-chat-config.json

Felladrin

Update mlc-chat-config.json

69ceda3 verified about 1 month ago

raw

history blame contribute delete

No virus

2.04 kB

	{
	"version": "0.1.0",
	"model_type": "llama",
	"quantization": "q4f16_1",
	"model_config": {
	"hidden_size": 4096,
	"intermediate_size": 14336,
	"num_attention_heads": 32,
	"num_hidden_layers": 32,
	"rms_norm_eps": 1e-05,
	"vocab_size": 128256,
	"position_embedding_base": 500000.0,
	"context_window_size": 8192,
	"prefill_chunk_size": 2048,
	"num_key_value_heads": 8,
	"head_dim": 128,
	"tensor_parallel_shards": 1,
	"max_batch_size": 80
	},
	"vocab_size": 128256,
	"context_window_size": 8192,
	"sliding_window_size": -1,
	"prefill_chunk_size": 2048,
	"attention_sink_size": -1,
	"tensor_parallel_shards": 1,
	"temperature": 0.6,
	"presence_penalty": 0.0,
	"frequency_penalty": 0.0,
	"repetition_penalty": 1.0,
	"top_p": 0.9,
	"tokenizer_files": [
	"tokenizer.json",
	"tokenizer_config.json"
	],
	"tokenizer_info": {
	"token_postproc_method": "byte_level",
	"prepend_space_in_encode": false,
	"strip_space_in_decode": false
	},
	"conv_template": {
	"name": "custom",
	"system_template": "System: {system_message}\n\n",
	"system_message": "This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context.",
	"system_prefix_token_ids": [
	128000
	],
	"add_role_after_system_message": true,
	"roles": {
	"user": "User",
	"assistant": "Assistant"
	},
	"role_templates": {
	"user": "{user_message}",
	"assistant": "{assistant_message}",
	"tool": "{tool_message}"
	},
	"messages": [],
	"seps": [
	"\n\n"
	],
	"role_content_sep": ": ",
	"role_empty_sep": ":",
	"stop_str": [],
	"stop_token_ids": [
	128001,
	128009
	],
	"function_string": "",
	"use_function_calling": false
	},
	"pad_token_id": 0,
	"bos_token_id": 128000,
	"eos_token_id": [
	128001,
	128009
	]
	}