PrunaAI
/

Muhammad2003-Llama3-8B-OpenHermes-DPO-bnb-4bit-smashed

Text Generation

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

Muhammad2003-Llama3-8B-OpenHermes-DPO-bnb-4bit-smashed / smash_config.json

sharpenb's picture

Upload folder using huggingface_hub (#1)

da77b21 verified 4 months ago

history blame contribute delete

1.04 kB

	{
	"api_key": null,
	"verify_url": "http://johnrachwan.pythonanywhere.com",
	"smash_config": {
	"pruners": "None",
	"pruning_ratio": 0.0,
	"factorizers": "None",
	"quantizers": "['llm-int8']",
	"weight_quantization_bits": 4,
	"output_deviation": 0.005,
	"compilers": "None",
	"static_batch": true,
	"static_shape": true,
	"controlnet": "None",
	"unet_dim": 4,
	"device": "cuda",
	"cache_dir": "/ceph/hdd/staff/charpent/.cache/modelsx0ij7nur",
	"batch_size": 1,
	"model_name": "Muhammad2003/Llama3-8B-OpenHermes-DPO",
	"task": "text_text_generation",
	"max_batch_size": 1,
	"qtype_weight": "torch.qint8",
	"qtype_activation": "torch.quint8",
	"qobserver": "<class 'torch.ao.quantization.observer.MinMaxObserver'>",
	"qscheme": "torch.per_tensor_symmetric",
	"qconfig": "x86",
	"group_size": 128,
	"damp_percent": 0.1,
	"save_load_fn": "bitsandbytes"
	}
	}