aws-neuron
/

optimum-neuron-cache

Model card Files Files and versions Community

optimum-neuron-cache / inference-cache-config /llama.json

dacorvo's picture

dacorvo HF staff

Added Llama-70b batch_size 4 to inference cache

593822e verified 5 months ago

No virus

2.3 kB

	{
	"meta-llama/Llama-2-7b-chat-hf": [
	{
	"batch_size": 1,
	"sequence_length": 4096,
	"num_cores": 2,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 1,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 1,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 4,
	"sequence_length": 4096,
	"num_cores": 2,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 4,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 4,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 8,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 8,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 16,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 16,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	}
	],
	"meta-llama/Llama-2-13b-chat-hf": [
	{
	"batch_size": 1,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 1,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 4,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 4,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 8,
	"sequence_length": 4096,
	"num_cores": 8,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 8,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	}
	],
	"meta-llama/Llama-2-70b-chat-hf": [
	{
	"batch_size": 1,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	},
	{
	"batch_size": 4,
	"sequence_length": 4096,
	"num_cores": 24,
	"auto_cast_type": "fp16"
	}
	]
	}