Upload LlamaForCausalLM

#3
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openlm-research/open_llama_3b",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -13,11 +13,14 @@
13
  "model_type": "llama",
14
  "num_attention_heads": 32,
15
  "num_hidden_layers": 26,
 
16
  "pad_token_id": 0,
 
17
  "rms_norm_eps": 1e-06,
 
18
  "tie_word_embeddings": false,
19
  "torch_dtype": "float32",
20
- "transformers_version": "4.30.2",
21
  "use_cache": true,
22
  "vocab_size": 52000
23
  }
 
1
  {
2
+ "_name_or_path": "hf_output_final/",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
13
  "model_type": "llama",
14
  "num_attention_heads": 32,
15
  "num_hidden_layers": 26,
16
+ "num_key_value_heads": 32,
17
  "pad_token_id": 0,
18
+ "pretraining_tp": 1,
19
  "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.31.0",
24
  "use_cache": true,
25
  "vocab_size": 52000
26
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.30.2"
7
  }
 
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.31.0"
7
  }
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42617b1733f6f55a66328ff69e343cf4a70465b3e3776b87bdb4c886bd8fd663
3
- size 9972241191
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4991c881fc6493c14ddd4055a60b5e2b16d8bb2298026721cbdea2af933d3cf
3
+ size 9972237073
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be39d7611ece4274b33a7326fa6dd214815bd3d13cd189045385bb718b3fdcea
3
- size 4245748053
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f151a798ef2fb9613d50d6e3c19eb7887f7ebe493664af08acf32a212301fa89
3
+ size 4245745796
pytorch_model.bin.index.json CHANGED
@@ -106,9 +106,9 @@
106
  "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
107
  "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
108
  "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
109
- "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
110
  "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
111
- "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
112
  "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
113
  "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
114
  "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
 
106
  "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin",
107
  "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
108
  "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
109
+ "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
110
  "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
111
+ "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
112
  "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
113
  "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
114
  "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",