Kendamarron commited on
Commit
a909317
1 Parent(s): 5194685

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -3
README.md CHANGED
@@ -1,3 +1,50 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ ```
5
+ ### model
6
+ model_name_or_path: Kendamarron/Width-Up-Scaled-llm-jp-3-2.3b
7
+
8
+ ### method
9
+ stage: pt
10
+ do_train: true
11
+ finetuning_type: full
12
+ enable_liger_kernel: true
13
+ flash_attn: fa2
14
+
15
+ ### dataset
16
+ dataset: abeja_test
17
+ cutoff_len: 4096
18
+ packing: true
19
+ overwrite_cache: true
20
+ preprocessing_num_workers: 64
21
+
22
+ ### output
23
+ output_dir: saves/llm-jp/full/cpt/
24
+ logging_steps: 1
25
+ save_steps: 500
26
+ plot_loss: true
27
+ overwrite_output_dir: true
28
+
29
+ ### train
30
+ per_device_train_batch_size: 16
31
+ gradient_accumulation_steps: 4
32
+ learning_rate: 1.0e-4
33
+ num_train_epochs: 1.0
34
+ lr_scheduler_type: constant_with_warmup
35
+ adam_beta2: 0.9
36
+ adam_beta2: 0.95
37
+ optim: adamw_bnb_8bit
38
+ warmup_steps: 500
39
+ bf16: true
40
+ ddp_timeout: 180000000
41
+
42
+ ### eval
43
+ val_size: 1000
44
+ per_device_eval_batch_size: 2
45
+ eval_strategy: steps
46
+ eval_steps: 500
47
+
48
+ ### logging
49
+ report_to: wandb
50
+ ```