ruzToRuz commited on
Commit
0076a1c
1 Parent(s): 4d9c968

Upload folder using huggingface_hub

Browse files
checkpoints/llamastack/config.json CHANGED
@@ -1,22 +1,25 @@
1
  {
 
2
  "architectures": [
3
- "LlamaForCausalLM"
4
  ],
5
- "bos_token_id": 1,
6
- "eos_token_id": 2,
7
- "hidden_act": "silu",
8
- "hidden_size": 768,
9
  "initializer_range": 0.02,
10
- "intermediate_size": 2048,
 
11
  "max_position_embeddings": 2048,
12
- "model_type": "llama",
13
- "num_attention_heads": 6,
14
- "num_hidden_layers": 12,
15
- "pad_token_id": 0,
16
- "rms_norm_eps": 1e-12,
17
  "tie_word_embeddings": false,
18
  "torch_dtype": "float32",
19
  "transformers_version": "4.28.1",
20
  "use_cache": true,
21
- "vocab_size": 32000
 
22
  }
 
1
  {
2
+ "_name_or_path": "/content/tmp-output/checkpoint-100",
3
  "architectures": [
4
+ "GPTNeoXForCausalLM"
5
  ],
6
+ "bos_token_id": 0,
7
+ "eos_token_id": 0,
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 1024,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 4096,
12
+ "layer_norm_eps": 1e-05,
13
  "max_position_embeddings": 2048,
14
+ "model_type": "gpt_neox",
15
+ "num_attention_heads": 16,
16
+ "num_hidden_layers": 24,
17
+ "rotary_emb_base": 10000,
18
+ "rotary_pct": 0.25,
19
  "tie_word_embeddings": false,
20
  "torch_dtype": "float32",
21
  "transformers_version": "4.28.1",
22
  "use_cache": true,
23
+ "use_parallel_residual": true,
24
+ "vocab_size": 50304
25
  }
checkpoints/llamastack/generation_config.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 1,
4
- "eos_token_id": 2,
5
- "pad_token_id": 0,
6
  "transformers_version": "4.28.1"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 0,
 
5
  "transformers_version": "4.28.1"
6
  }
checkpoints/llamastack/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93f92d3a8d52fd6b715a352b934f3969369b804f0e4cc46d5da977d781b02737
3
- size 1072940165
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a587e70bda43138e9cba5565397b405aa67743fc9eee211f200e5f8e2d45f696
3
+ size 3242915589
checkpoints/llamastack/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9ca9af1519d8c5fb41249543245cf0b5ea3c9e29109aa08e5cd52c87f48f8fb
3
- size 536467513
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a644d96f06192f422843011253faae7b4aee1d1223d4dcda4050aae3c7321994
3
+ size 1722122365
checkpoints/llamastack/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2acb32cf621e325606613abb16a1d24390b0d9e26469bed6bcd2ab3ecb604ce
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e8445c8c2999b5a0549408a323d0c0368170cc3a511490b5cc6caa80998e45
3
  size 627
checkpoints/llamastack/special_tokens_map.json CHANGED
@@ -1,24 +1,6 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": "<s>",
17
- "unk_token": {
18
- "content": "<unk>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|padding|>",
5
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
checkpoints/llamastack/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/llamastack/tokenizer_config.json CHANGED
@@ -1,33 +1,11 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": true,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "<s>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "clean_up_tokenization_spaces": false,
13
- "eos_token": {
14
- "__type": "AddedToken",
15
- "content": "</s>",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
  "model_max_length": 1000000000000000019884624838656,
22
- "pad_token": null,
23
- "sp_model_kwargs": {},
24
- "tokenizer_class": "LlamaTokenizer",
25
- "unk_token": {
26
- "__type": "AddedToken",
27
- "content": "<unk>",
28
- "lstrip": false,
29
- "normalized": true,
30
- "rstrip": false,
31
- "single_word": false
32
- }
33
  }
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": true,
4
+ "add_prefix_space": false,
5
+ "bos_token": "<|endoftext|>",
6
+ "clean_up_tokenization_spaces": true,
7
+ "eos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  "model_max_length": 1000000000000000019884624838656,
9
+ "tokenizer_class": "GPTNeoXTokenizer",
10
+ "unk_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
11
  }
checkpoints/llamastack/trainer_state.json CHANGED
@@ -1,40 +1,15 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.4,
12
- "learning_rate": 8.689839572192514e-05,
13
- "loss": 5.1589,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.8,
18
- "learning_rate": 7.352941176470589e-05,
19
- "loss": 4.2823,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 1.2,
24
- "learning_rate": 6.016042780748663e-05,
25
- "loss": 3.8577,
26
- "step": 1500
27
- },
28
- {
29
- "epoch": 1.6,
30
- "learning_rate": 4.679144385026738e-05,
31
- "loss": 3.6414,
32
- "step": 2000
33
- }
34
- ],
35
- "max_steps": 3750,
36
  "num_train_epochs": 3,
37
- "total_flos": 2003670839033856.0,
38
  "trial_name": null,
39
  "trial_params": null
40
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.5384615384615383,
5
+ "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
+ "log_history": [],
10
+ "max_steps": 390,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "num_train_epochs": 3,
12
+ "total_flos": 554205263953920.0,
13
  "trial_name": null,
14
  "trial_params": null
15
  }
checkpoints/llamastack/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e603106e28facc5d5aa6b8a66fa37f038002feb3de39d91214b784ab8683c4b
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d06eaf40be8bf84db14eee865477c551377f867367ee1b7cf0abf46a7439a0de
3
+ size 3579