kingabzpro commited on
Commit
686b9e6
1 Parent(s): 3ce7bb8

Upload LlamaForCausalLM

Browse files
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- library_name: transformers
 
3
  license: apache-2.0
 
4
  datasets:
5
  - Trelis/orpo-dpo-mix-40k-SHORT
6
- language:
7
- - en
8
  pipeline_tag: text-classification
9
  ---
10
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
+ library_name: transformers
6
  datasets:
7
  - Trelis/orpo-dpo-mix-40k-SHORT
 
 
8
  pipeline_tag: text-classification
9
  ---
10
 
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3-8B",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
- "bos_token_id": 128256,
9
- "eos_token_id": 128257,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
@@ -17,7 +17,6 @@
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
20
- "pad_token_id": 128257,
21
  "pretraining_tp": 1,
22
  "rms_norm_eps": 1e-05,
23
  "rope_scaling": null,
@@ -26,5 +25,5 @@
26
  "torch_dtype": "float16",
27
  "transformers_version": "4.41.0",
28
  "use_cache": true,
29
- "vocab_size": 128258
30
  }
 
1
  {
2
+ "_name_or_path": "/kaggle/input/llama-3/transformers/8b-hf/1",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
6
  "attention_bias": false,
7
  "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": 128001,
10
  "hidden_act": "silu",
11
  "hidden_size": 4096,
12
  "initializer_range": 0.02,
 
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 32,
19
  "num_key_value_heads": 8,
 
20
  "pretraining_tp": 1,
21
  "rms_norm_eps": 1e-05,
22
  "rope_scaling": null,
 
25
  "torch_dtype": "float16",
26
  "transformers_version": "4.41.0",
27
  "use_cache": true,
28
+ "vocab_size": 128256
29
  }
generation_config.json CHANGED
@@ -1,10 +1,6 @@
1
  {
2
- "bos_token_id": 128256,
3
- "do_sample": true,
4
- "eos_token_id": 128257,
5
- "max_length": 4096,
6
- "pad_token_id": 128257,
7
- "temperature": 0.6,
8
- "top_p": 0.9,
9
  "transformers_version": "4.41.0"
10
  }
 
1
  {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 128000,
4
+ "eos_token_id": 128001,
 
 
 
 
5
  "transformers_version": "4.41.0"
6
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90b105009f6cd276891d0da58573f932960a0ce3b155d9ac5f3d857017ec6ec8
3
- size 4976714976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6948f1132949c27c365b689f126fef076fe4182f553b8c0399639e0135241f4
3
+ size 4976698592
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:614751c97e105793b5b9d5cc5a15f23c2c4b00a8cc7b0656ef60b47893a842c8
3
  size 4999802616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:414dede41da3a92b825a85b6ee22119d538c20305258f5c6e724626793293179
3
  size 4999802616
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dd469a74e79763a438c2597f6c53a9b5c80d011bd58e20b636d914677ce1533
3
  size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a15f0b0b0857d4017400881f4241d51835ac6c0d622db305c026954141d11faa
3
  size 4915916080
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac146a09043bcb032f655a3cacb56793eec649fd64ff5e219aa906968e7f141b
3
- size 1168155192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350ac0df99121df394b7795107dbe8e4e1beb0ae066b6f8a89e9c504f6553dab
3
+ size 1168138808
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 16060555264
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 16060522496
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00004-of-00004.safetensors",