ChallengerSpaceShuttle commited on
Commit
7b0d31b
1 Parent(s): f353196

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma2ForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "attn_logit_softcapping": 50.0,
8
+ "bos_token_id": 2,
9
+ "cache_implementation": "hybrid",
10
+ "eos_token_id": 1,
11
+ "final_logit_softcapping": 30.0,
12
+ "head_dim": 256,
13
+ "hidden_act": "gelu_pytorch_tanh",
14
+ "hidden_activation": "gelu_pytorch_tanh",
15
+ "hidden_size": 2304,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 9216,
18
+ "max_position_embeddings": 8192,
19
+ "model_type": "gemma2",
20
+ "num_attention_heads": 8,
21
+ "num_hidden_layers": 26,
22
+ "num_key_value_heads": 4,
23
+ "pad_token_id": 0,
24
+ "query_pre_attn_scalar": 256,
25
+ "rms_norm_eps": 1e-06,
26
+ "rope_theta": 10000.0,
27
+ "sliding_window": 4096,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.42.4",
30
+ "use_cache": true,
31
+ "vocab_size": 288256
32
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "cache_implementation": "hybrid",
5
+ "eos_token_id": 1,
6
+ "pad_token_id": 0,
7
+ "transformers_version": "4.42.4"
8
+ }
hyperparameters.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_name: google/gemma-2-2b
2
+ out_dir: pretrained_model/models
3
+ precision: bf16-mixed
4
+ initial_checkpoint_dir: google/gemma-2-2b
5
+ resume: false
6
+ data:
7
+ class_path: litgpt.data.LitData
8
+ init_args:
9
+ data_path: data
10
+ seed: 42
11
+ num_workers: 8
12
+ train:
13
+ save_interval: 1000
14
+ log_interval: 1
15
+ global_batch_size: 4
16
+ micro_batch_size: 1
17
+ lr_warmup_steps: 2000
18
+ max_tokens: 156800708
19
+ max_seq_length: 2048
20
+ tie_embeddings: false
21
+ max_norm: 1.0
22
+ min_lr: 4.0e-05
23
+ eval:
24
+ interval: 1000
25
+ max_iters: 100
26
+ initial_validation: false
27
+ final_validation: true
28
+ optimizer: AdamW
29
+ devices: auto
30
+ num_nodes: 1
31
+ tokenizer_dir: google/gemma-2-2b
32
+ logger_name: tensorboard
33
+ seed: 42
lit_model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3727b8adbc7efe76771ec1159e088d170f18710eb326f20f9ba771c58f3e4b61
3
+ size 26822532114
model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06e57db92ad9a9894fb8e6999c8a4e9cb9aee534e2decba89e5942237d058652
3
+ size 13411292690
model_config.yaml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ attention_logit_softcapping: 50.0
2
+ attention_scores_scalar: 256
3
+ bias: false
4
+ block_size: 8192
5
+ final_logit_softcapping: 30.0
6
+ gelu_approximate: tanh
7
+ head_size: 256
8
+ hf_config:
9
+ name: gemma-2-2b
10
+ org: google
11
+ intermediate_size: 9216
12
+ lm_head_bias: false
13
+ mlp_class_name: GemmaMLP
14
+ n_embd: 2304
15
+ n_expert: 0
16
+ n_expert_per_token: 0
17
+ n_head: 8
18
+ n_layer: 26
19
+ n_query_groups: 4
20
+ name: Gemma-2-2b
21
+ norm_class_name: RMSNorm
22
+ norm_eps: 1.0e-05
23
+ padded_vocab_size: 288256
24
+ padding_multiple: 512
25
+ parallel_residual: false
26
+ post_attention_norm: true
27
+ post_mlp_norm: true
28
+ rope_base: 10000
29
+ rope_condense_ratio: 1
30
+ rotary_percentage: 1.0
31
+ scale_embeddings: true
32
+ shared_attention_norm: false
33
+ sliding_window_layer_placing: 2
34
+ sliding_window_size: 4096
35
+ vocab_size: 288256
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:593ff07d6c4160df5d12c5199d4b4d8ea02cac22bf797264db0246a523daea61
3
+ size 4838146