winglian commited on
Commit
e53f59f
·
1 Parent(s): 6638474

ep1 packing v3

Browse files
config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "bos_token_id": 1,
6
+ "eos_token_id": 2,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 3200,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 8640,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_hidden_layers": 26,
15
+ "pad_token_id": 0,
16
+ "rms_norm_eps": 1e-06,
17
+ "tie_word_embeddings": false,
18
+ "torch_dtype": "float16",
19
+ "transformers_version": "4.31.0.dev0",
20
+ "use_cache": true,
21
+ "vocab_size": 32000
22
+ }
configs/packing-2.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # base_model: meta-llama/Llama-2-7b-hf
2
+ # base_model_config: meta-llama/Llama-2-7b-hf
3
+ base_model: openlm-research/open_llama_3b_v2
4
+ base_model_config: openlm-research/open_llama_3b_v2
5
+ model_type: LlamaForCausalLM
6
+ tokenizer_type: LlamaTokenizer
7
+ tokenizer_use_fast: false
8
+ load_in_8bit: false
9
+ load_in_4bit: false
10
+ strict: false
11
+ push_dataset_to_hub: openaccess-ai-collective
12
+ hf_use_auth_token: true
13
+ datasets:
14
+ - path: Open-Orca/oo-gpt4-200k
15
+ type: alpaca_w_system.load_open_orca
16
+ dataset_shard_num: 5
17
+ dataset_shard_idx: 0
18
+ dataset_prepared_path: last_run_prepared
19
+ val_set_size: 0.01
20
+ adapter:
21
+ lora_model_dir:
22
+ sequence_len: 2048
23
+ max_packed_sequence_len:
24
+ sample_packing: true
25
+ sample_packing_eff_est: 0.98
26
+ lora_r:
27
+ lora_alpha:
28
+ lora_dropout:
29
+ lora_target_modules:
30
+ lora_target_linear:
31
+ lora_fan_in_fan_out:
32
+ wandb_project: packing-tests-3b
33
+ wandb_watch:
34
+ wandb_run_id:
35
+ wandb_log_model:
36
+ output_dir: ./open-orca-3b
37
+ gradient_accumulation_steps: 1
38
+ micro_batch_size: 4
39
+ num_epochs: 6
40
+ optimizer: adamw_torch
41
+ adam_beta2: 0.95
42
+ max_grad_norm: 1.0
43
+ torchdistx_path:
44
+ lr_scheduler: cosine
45
+ lr_quadratic_warmup: true
46
+ learning_rate: 0.0000045
47
+ train_on_inputs: false
48
+ group_by_length: false
49
+ bf16: true
50
+ fp16: false
51
+ tf32: true
52
+ gradient_checkpointing: true
53
+ early_stopping_patience:
54
+ resume_from_checkpoint:
55
+ local_rank:
56
+ logging_steps: 1
57
+ xformers_attention:
58
+ flash_attention:
59
+ sdp_attention: true
60
+ flash_optimum:
61
+ gptq_groupsize:
62
+ gptq_model_v1:
63
+ warmup_steps: 10
64
+ eval_steps: 49
65
+ save_steps:
66
+ debug:
67
+ deepspeed:
68
+ weight_decay: 0.1
69
+ special_tokens:
70
+ bos_token: "<s>"
71
+ eos_token: "</s>"
72
+ unk_token: "<unk>"
73
+ fsdp:
74
+ - full_shard
75
+ - auto_wrap
76
+ fsdp_config:
77
+ fsdp_offload_params: true
78
+ fsdp_state_dict_type: FULL_STATE_DICT
79
+ fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
80
+
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.31.0.dev0"
7
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10150d89cbc8eee3fa245d3f0b3499e382232a27fb3d83263123f73292bda083
3
+ size 6853066089
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false}, "eos_token": {"content": "</s>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false}, "unk_token": {"content": "<unk>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false}}
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b289e85fa20fd375d8b33dc12f77616f18abc6359804471d1fafcb425fecb8
3
+ size 511574
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"add_bos_token": true, "add_eos_token": false, "model_max_length": 2048, "pad_token": null, "sp_model_kwargs": {}, "tokenizer_class": "LlamaTokenizer", "clean_up_tokenization_spaces": false, "bos_token": {"__type": "AddedToken", "content": "<s>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false}, "eos_token": {"__type": "AddedToken", "content": "</s>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false}, "unk_token": {"__type": "AddedToken", "content": "<unk>", "lstrip": false, "normalized": true, "rstrip": false, "single_word": false}}