Kendamarron commited on
Commit
dc39238
·
verified ·
1 Parent(s): 5803481

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  library_name: transformers
3
- license: other
4
  base_model: llm-jp/llm-jp-3-3.7b-instruct
5
  tags:
6
  - llama-factory
@@ -9,6 +9,8 @@ tags:
9
  model-index:
10
  - name: sft
11
  results: []
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -61,3 +63,49 @@ The following hyperparameters were used during training:
61
  - Pytorch 2.4.1+cu121
62
  - Datasets 3.1.0
63
  - Tokenizers 0.20.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  library_name: transformers
3
+ license: apache-2.0
4
  base_model: llm-jp/llm-jp-3-3.7b-instruct
5
  tags:
6
  - llama-factory
 
9
  model-index:
10
  - name: sft
11
  results: []
12
+ language:
13
+ - ja
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
63
  - Pytorch 2.4.1+cu121
64
  - Datasets 3.1.0
65
  - Tokenizers 0.20.3
66
+
67
+ ### LLaMA-Factory yaml
68
+ ```
69
+ ### model
70
+ model_name_or_path: llm-jp/llm-jp-3-3.7b-instruct
71
+
72
+ ### method
73
+ stage: sft
74
+ do_train: true
75
+ finetuning_type: full
76
+ deepspeed: examples/deepspeed/ds_z3_config.json
77
+
78
+ ### dataset
79
+ dataset: cot_normal, cot_math
80
+ template: alpaca_ja
81
+ cutoff_len: 8192
82
+ overwrite_cache: true
83
+ preprocessing_num_workers: 16
84
+
85
+ ### output
86
+ output_dir: saves/llm_jp/full/sft
87
+ logging_steps: 10
88
+ save_steps: 500
89
+ plot_loss: true
90
+ overwrite_output_dir: true
91
+
92
+ ### train
93
+ per_device_train_batch_size: 8
94
+ gradient_accumulation_steps: 4
95
+ learning_rate: 1.0e-5
96
+ num_train_epochs: 2.0
97
+ lr_scheduler_type: cosine
98
+ optim: adamw_bnb_8bit
99
+ warmup_ratio: 0.1
100
+ bf16: true
101
+ ddp_timeout: 180000000
102
+
103
+ ### eval
104
+ val_size: 0.01
105
+ per_device_eval_batch_size: 1
106
+ eval_strategy: steps
107
+ eval_steps: 500
108
+
109
+ ### logging
110
+ report_to: wandb
111
+ ```