aaditya commited on
Commit
a82a711
1 Parent(s): cf64284

Create phi3-ft-fsdp.yml (#1580)

Browse files

rename to be fsdp specific and tweak settings a bit

Files changed (1) hide show
  1. examples/phi/phi3-ft-fsdp.yml +83 -0
examples/phi/phi3-ft-fsdp.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: microsoft/Phi-3-mini-4k-instruct
2
+ model_type: AutoModelForCausalLM
3
+ tokenizer_type: AutoTokenizer
4
+
5
+ load_in_8bit: false
6
+ load_in_4bit: false
7
+ strict: false
8
+
9
+ datasets:
10
+ - path: mhenrichsen/alpaca_2k_test
11
+ type: alpaca
12
+
13
+ dataset_prepared_path:
14
+ val_set_size: 0
15
+ output_dir: ./phi-sft-out
16
+
17
+ sequence_len: 4096
18
+ sample_packing: true
19
+ pad_to_sequence_len: true
20
+ trust_remote_code: true
21
+
22
+ adapter:
23
+ lora_model_dir:
24
+ lora_r:
25
+ lora_alpha:
26
+ lora_dropout:
27
+ lora_target_linear:
28
+ lora_fan_in_fan_out:
29
+
30
+ wandb_project: phi3
31
+ wandb_entity:
32
+ wandb_watch:
33
+ wandb_name:
34
+ wandb_log_model:
35
+
36
+ gradient_accumulation_steps: 2
37
+ micro_batch_size: 12
38
+ num_epochs: 2
39
+ optimizer: adamw_torch
40
+ adam_beta2: 0.95
41
+ adam_epsilon: 0.00001
42
+ max_grad_norm: 1.0
43
+ lr_scheduler: cosine
44
+ learning_rate: 0.000003
45
+
46
+ train_on_inputs: false
47
+ group_by_length: false
48
+ bf16: auto
49
+ fp16:
50
+ tf32: true
51
+
52
+ gradient_checkpointing: true
53
+ gradient_checkpointing_kwargs:
54
+ use_reentrant: true
55
+ early_stopping_patience:
56
+ resume_from_checkpoint:
57
+ local_rank:
58
+ logging_steps: 1
59
+ xformers_attention:
60
+ flash_attention: true
61
+
62
+ warmup_steps: 100
63
+ evals_per_epoch: 4
64
+ saves_per_epoch: 1
65
+ debug:
66
+ deepspeed:
67
+ weight_decay: 0.1
68
+ fsdp:
69
+ - full_shard
70
+ - auto_wrap
71
+ fsdp_config:
72
+ fsdp_limit_all_gathers: true
73
+ fsdp_sync_module_states: true
74
+ fsdp_offload_params: true
75
+ fsdp_use_orig_params: false
76
+ fsdp_cpu_ram_efficient_loading: true
77
+ fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
78
+ fsdp_transformer_layer_cls_to_wrap: Phi3DecoderLayer
79
+ fsdp_state_dict_type: FULL_STATE_DICT
80
+ fsdp_sharding_strategy: FULL_SHARD
81
+ resize_token_embeddings_to_32x: true
82
+ special_tokens:
83
+ pad_token: "<|endoftext|>"