andreaskoepf commited on
Commit
cf790b9
1 Parent(s): f42fe16

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +108 -0
README.md CHANGED
@@ -1,3 +1,111 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ wandb: https://wandb.ai/open-assistant/supervised-finetuning/runs/sytsyhrp
5
+
6
+
7
+ Datasets:
8
+
9
+ ```
10
+ pretrain:
11
+ num_train_epochs: 1
12
+ weight_decay: 0.0
13
+ use_custom_sampler: true
14
+ sort_by_length: false
15
+ datasets:
16
+ - gpteacher_roleplay:
17
+ val_split: 0.05
18
+ - red_pajama:
19
+ fraction: 0.25
20
+ max_val_set: 1000
21
+ - wizardlm_70k:
22
+ val_split: 0.05
23
+ max_val_set: 500
24
+ - joke:
25
+ val_split: 0.05
26
+ - poem_instructions:
27
+ val_split: 0.025
28
+ - oa_stackexchange:
29
+ val_split: 0.05
30
+ fraction: 0.1
31
+ max_val_set: 1000
32
+ - tell_a_joke:
33
+ val_split: 0.05
34
+ max_val_set: 250
35
+ - webgpt:
36
+ val_split: 0.05
37
+ max_val_set: 250
38
+ - gpt4all:
39
+ val_split: 0.01
40
+ max_val_set: 1000
41
+ - alpaca_gpt4:
42
+ val_split: 0.025
43
+ max_val_set: 250
44
+ - code_alpaca:
45
+ val_split: 0.05
46
+ max_val_set: 250
47
+ - vicuna:
48
+ max_val_set: 250
49
+ - oig_file:
50
+ source_url: https://huggingface.co/datasets/laion/OIG/resolve/main/unified_chip2.jsonl
51
+ max_count: 10000
52
+ min_length: 250
53
+ val_split: 0.05
54
+ max_val_set: 250
55
+ - minimath:
56
+ val_split: 0.05
57
+ - humaneval_mbpp_codegen_qa:
58
+ val_split: 0.05
59
+ - humaneval_mbpp_testgen_qa:
60
+ val_split: 0.05
61
+ - grade_school_math_instructions:
62
+ val_split: 0.05
63
+ - recipes:
64
+ val_split: 0.05
65
+ - cmu_wiki_qa:
66
+ val_split: 0.05
67
+ - oa_wiki_qa_bart_10000row:
68
+ val_split: 0.05
69
+ max_val_set: 250
70
+ - prosocial_dialogue:
71
+ fraction: 0.1
72
+ max_val_set: 250
73
+ - explain_prosocial:
74
+ fraction: 0.075
75
+ max_val_set: 250
76
+ - soda:
77
+ fraction: 0.25
78
+ max_val_set: 1000
79
+ - oa_leet10k:
80
+ val_split: 0.05
81
+ max_val_set: 250
82
+ - dolly15k:
83
+ val_split: 0.05
84
+ max_val_set: 300
85
+ ```
86
+
87
+
88
+ Pythia:
89
+ ```
90
+ pythia-12b-pretrain:
91
+ dtype: fp16
92
+ log_dir: "pythia_log_12b"
93
+ learning_rate: 6e-6
94
+ model_name: EleutherAI/pythia-12b-deduped
95
+ output_dir: pythia_model_12b
96
+ weight_decay: 0.0
97
+ max_length: 2048
98
+ warmup_steps: 100
99
+ gradient_checkpointing: true
100
+ gradient_accumulation_steps: 4
101
+ per_device_train_batch_size: 4
102
+ per_device_eval_batch_size: 4
103
+ eval_steps: 251
104
+ save_steps: 500
105
+ num_train_epochs: 1
106
+ save_total_limit: 2
107
+ deepspeed_config: configs/zero_config_pretrain.json
108
+ ```
109
+
110
+
111
+ Command used: `deepspeed trainer_sft.py --show_dataset_stats --configs defaults pythia-12b-pretrain pretrain --cache_dir .cache/ --output_dir .saved/pythia-12b-super-pretrain2 --deepspeed`