andreaskoepf commited on
Commit
3bd5bef
1 Parent(s): 578e911

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +62 -0
README.md CHANGED
@@ -1,3 +1,65 @@
1
  ---
2
  license: apache-2.0
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
  ---
4
+ - wandb (internal): https://wandb.ai/open-assistant/supervised-finetuning/runs/tlevhltw
5
+ - checkpoint: 2000 steps (~2.9 epochs)
6
+
7
+ Model:
8
+ ```
9
+ falcon-7b:
10
+ dtype: bf16
11
+ log_dir: "falcon_log_7b"
12
+ learning_rate: 1e-5
13
+ model_name: "tiiuae/falcon-7b"
14
+ deepspeed_config: configs/zero_config.json
15
+ output_dir: falcon
16
+ weight_decay: 0.0
17
+ max_length: 2048
18
+ warmup_steps: 20
19
+ gradient_checkpointing: true
20
+ gradient_accumulation_steps: 4
21
+ per_device_train_batch_size: 4
22
+ per_device_eval_batch_size: 8
23
+ eval_steps: 100
24
+ save_steps: 500
25
+ save_strategy: steps
26
+ num_train_epochs: 8
27
+ save_total_limit: 4
28
+ residual_dropout: 0.2
29
+ residual_dropout_lima: true
30
+ ```
31
+
32
+ Dataset:
33
+ ```
34
+ sft9-stage2:
35
+ # oasst_export: 100.00% (29899)
36
+ # vicuna: 50.00% (16963)
37
+ # code_alpaca: 50.00% (9510)
38
+ # oa_wiki_qa_bart_10000row: 100.00% (9434)
39
+ # grade_school_math_instructions: 100.00% (8351)
40
+ # dolly15k: 100.00% (14250)
41
+
42
+ use_custom_sampler: true
43
+ datasets:
44
+ - oasst_export:
45
+ lang: "bg,ca,cs,da,de,en,es,fr,hr,hu,it,nl,pl,pt,ro,ru,sl,sr,sv,uk" # sft-8.0
46
+ input_file_path: 2023-06-02_oasst_all_labels.jsonl.gz
47
+ val_split: 0.05
48
+ top_k: 2
49
+ - vicuna:
50
+ fraction: 0.5
51
+ val_split: 0.025
52
+ max_val_set: 250
53
+ - code_alpaca:
54
+ fraction: 0.5
55
+ val_split: 0.05
56
+ max_val_set: 250
57
+ - oa_wiki_qa_bart_10000row:
58
+ val_split: 0.05
59
+ max_val_set: 250
60
+ - grade_school_math_instructions:
61
+ val_split: 0.05
62
+ - dolly15k:
63
+ val_split: 0.05
64
+ max_val_set: 300
65
+ ```