mlc-q4f16-Llama3-ChatQA-1.5-8B / mlc-chat-config.json
Felladrin's picture
Update mlc-chat-config.json
69ceda3 verified
raw
history blame contribute delete
No virus
2.04 kB
{
"version": "0.1.0",
"model_type": "llama",
"quantization": "q4f16_1",
"model_config": {
"hidden_size": 4096,
"intermediate_size": 14336,
"num_attention_heads": 32,
"num_hidden_layers": 32,
"rms_norm_eps": 1e-05,
"vocab_size": 128256,
"position_embedding_base": 500000.0,
"context_window_size": 8192,
"prefill_chunk_size": 2048,
"num_key_value_heads": 8,
"head_dim": 128,
"tensor_parallel_shards": 1,
"max_batch_size": 80
},
"vocab_size": 128256,
"context_window_size": 8192,
"sliding_window_size": -1,
"prefill_chunk_size": 2048,
"attention_sink_size": -1,
"tensor_parallel_shards": 1,
"temperature": 0.6,
"presence_penalty": 0.0,
"frequency_penalty": 0.0,
"repetition_penalty": 1.0,
"top_p": 0.9,
"tokenizer_files": [
"tokenizer.json",
"tokenizer_config.json"
],
"tokenizer_info": {
"token_postproc_method": "byte_level",
"prepend_space_in_encode": false,
"strip_space_in_decode": false
},
"conv_template": {
"name": "custom",
"system_template": "System: {system_message}\n\n",
"system_message": "This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context.",
"system_prefix_token_ids": [
128000
],
"add_role_after_system_message": true,
"roles": {
"user": "User",
"assistant": "Assistant"
},
"role_templates": {
"user": "{user_message}",
"assistant": "{assistant_message}",
"tool": "{tool_message}"
},
"messages": [],
"seps": [
"\n\n"
],
"role_content_sep": ": ",
"role_empty_sep": ":",
"stop_str": [],
"stop_token_ids": [
128001,
128009
],
"function_string": "",
"use_function_calling": false
},
"pad_token_id": 0,
"bos_token_id": 128000,
"eos_token_id": [
128001,
128009
]
}