winglian commited on
Commit
5e06dba
1 Parent(s): f153e0f

add configs

Browse files
Files changed (1) hide show
  1. configs/dodona.yml +83 -0
configs/dodona.yml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: openaccess-ai-collective/trophonius-15b
2
+ base_model_config: openaccess-ai-collective/trophonius-15b
3
+ load_in_8bit: false
4
+ load_in_4bit: false
5
+ gptq: false
6
+ strict: false
7
+ push_dataset_to_hub: winglian
8
+ hf_use_auth_token: true
9
+ datasets:
10
+ - path: openaccess-ai-collective/flan-cot-zs-noopt
11
+ data_files:
12
+ - cot_zs_noopt_train_augmented.gpt4.jsonl
13
+ type: alpaca_w_system
14
+ - path: winglian/evals
15
+ data_files:
16
+ - hf/arc-c-v2.jsonl
17
+ - hf/arc-e-v2.jsonl
18
+ - hf/riddle_sense-v2.jsonl
19
+ type: explainchoice:chat
20
+ - path: winglian/evals
21
+ data_files:
22
+ - hellaswag/hellaswag-v2.jsonl
23
+ type: explainchoice:chat
24
+ shards: 5
25
+ dataset_prepared_path: last_run_prepared
26
+ val_set_size: 0.01
27
+ adapter:
28
+ lora_model_dir:
29
+ sequence_len: 2048
30
+ max_packed_sequence_len: 2048
31
+ lora_r:
32
+ lora_alpha:
33
+ lora_dropout:
34
+ lora_target_modules:
35
+ lora_target_linear:
36
+ lora_fan_in_fan_out:
37
+ wandb_project: dodona-15b
38
+ wandb_watch:
39
+ wandb_run_id:
40
+ wandb_log_model:
41
+ output_dir: ./dodona-15b
42
+ gradient_accumulation_steps: 1
43
+ micro_batch_size: 2
44
+ num_epochs: 4
45
+ # optimizer: adamw_bnb_8bit
46
+ optimizer: paged_adamw_8bit
47
+ adam_beta2: 0.95
48
+ torchdistx_path:
49
+ lr_scheduler: cosine
50
+ learning_rate: 0.00003
51
+ train_on_inputs: false
52
+ group_by_length: true
53
+ bf16: true
54
+ fp16: false
55
+ tf32: true
56
+ gradient_checkpointing: true
57
+ early_stopping_patience:
58
+ resume_from_checkpoint:
59
+ local_rank:
60
+ logging_steps: 1
61
+ xformers_attention:
62
+ flash_attention:
63
+ gptq_groupsize:
64
+ gptq_model_v1:
65
+ warmup_steps: 100
66
+ eval_steps: 67
67
+ save_steps: 201
68
+ load_best_model_at_end: false
69
+ debug:
70
+ deepspeed:
71
+ weight_decay: 0.0001
72
+ fsdp:
73
+ - full_shard
74
+ - auto_wrap
75
+ fsdp_config:
76
+ fsdp_offload_params: true
77
+ fsdp_transformer_layer_cls_to_wrap: GPTBigCodeBlock
78
+ special_tokens:
79
+ pad_token: "<|endoftext|>"
80
+ bos_token: "<|endoftext|>"
81
+ eos_token: "<|endoftext|>"
82
+ unk_token: "<|endoftext|>"
83
+