{ "_name_or_path": "de-coder/UlizaLlama_Q4_K_M-gguf", "architectures": ["LlamaForCausalLM"], "model_type": "llama", "torch_dtype": "float16", "transformers_version": "4.34.0", "vocab_size": 20000, "hidden_size": 4096, "intermediate_size": 11008, "num_hidden_layers": 32, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "max_position_embeddings": 4096, "initializer_range": 0.02, "rms_norm_eps": 1e-05, "rope_theta": 10000.0, "rope_scaling": null, "attention_bias": false, "use_cache": true, "tie_word_embeddings": false, "bos_token_id": 1, "eos_token_id": 2, "pad_token_id": 0, "unk_token_id": 0, "quantization": { "method": "GGUF", "bits": 4, "variant": "q4_0", "block_size": 32, "group_size": 32, "tensors": { "query": "Q4_0", "key": "Q4_0", "value": "Q4_0", "output": "Q4_0", "intermediate": "Q4_0", "gate": "Q4_0", "embedding": "Q4_0", "norm": "F16" }, "scales_dtype": "fp16" }, "tokenizer": { "type": "BPE", "vocab_file": "ulizallama-tokenizer.json", "merges_file": "ulizallama-merges.txt" }, "generation": { "temperature": 0.7, "top_p": 0.95, "top_k": 40, "repetition_penalty": 1.1, "max_new_tokens": 512 }, "base_model": { "name": "Jacaranda/kiswallama-pretrained", "type": "Llama2 Continual Pretraining" }, "quantization_process": { "library": "llama.cpp", "version": "1.0.0", "command": "llama.cpp quantize ulizallama-7b.gguf ulizallama-7b-gguf-q4_0.bin q4_0" } }