kuleshov
/

llama-7b-4bit

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

kuleshov commited on Oct 3, 2023

Commit

a227127

•

1 Parent(s): 5eb0eee

first commit

Files changed (3) hide show

config.json +5 -5
quant_config.json +1 -1
quantized_weights.pt +1 -1

config.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-  "_name_or_path": "/share/kuleshov/vk379/llamas/llama-7b-4bit",
   "architectures": [
-    "LLaMAForCausalLM"
   ],
-  "bos_token_id": 0,
-  "eos_token_id": 1,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
@@ -14,7 +14,7 @@
   "model_type": "llama",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
-  "pad_token_id": -1,
   "rms_norm_eps": 1e-06,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",

 {
+  "_name_or_path": "huggyllama/llama-7b",
   "architectures": [
+    "LlamaForCausalLM"
   ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
   "model_type": "llama",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
+  "pad_token_id": 0,
   "rms_norm_eps": 1e-06,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",

quant_config.json CHANGED Viewed

@@ -7,5 +7,5 @@
   "percdamp": 0.01,
   "seed": 0,
   "nearest": false,
-  "save": "/tmp/llama-out-test"
 }

   "percdamp": 0.01,
   "seed": 0,
   "nearest": false,
+  "save": "./llama-7b-4bit"
 }

quantized_weights.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2be0456edba63d31f64be986886884607b76a0aae01b932f3fd4f0024f07d85b
 size 4020703149

 version https://git-lfs.github.com/spec/v1
+oid sha256:66d8d284a5d786e489a4b3cda35510fa376c31005a67d4e5c92fe0d6596b5770
 size 4020703149