IlyaGusev commited on
Commit
27a57d3
1 Parent(s): 3c4b721

Training config

Browse files
Files changed (1) hide show
  1. training_config.json +53 -0
training_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "evaluation_strategy": "steps",
4
+ "per_device_train_batch_size": 2,
5
+ "per_device_eval_batch_size": 2,
6
+ "gradient_accumulation_steps": 16,
7
+ "eval_steps": 150,
8
+ "save_steps": 150,
9
+ "logging_steps": 5,
10
+ "learning_rate": 0.003,
11
+ "num_train_epochs": 5,
12
+ "lr_scheduler_type": "cosine",
13
+ "warmup_steps": 100,
14
+ "fp16": false,
15
+ "bf16": true,
16
+ "gradient_checkpointing": false,
17
+ "torch_compile": false,
18
+ "optim": "adamw_torch",
19
+ "half_precision_backend": "auto",
20
+ "fp16_opt_level": "O2"
21
+ },
22
+ "deepspeed": {
23
+ "bf16": {
24
+ "enabled": true
25
+ },
26
+ "optimizer": {
27
+ "type": "AdamW",
28
+ "params": {
29
+ "lr": "auto",
30
+ "betas": "auto",
31
+ "eps": "auto",
32
+ "weight_decay": "auto"
33
+ }
34
+ },
35
+ "zero_optimization": {
36
+ "stage": 2,
37
+ "offload_optimizer": {
38
+ "device": "cpu",
39
+ "pin_memory": true
40
+ },
41
+ "overlap_comm": true,
42
+ "round_robin_gradients": true
43
+ },
44
+ "train_batch_size": "auto",
45
+ "gradient_accumulation_steps": "auto"
46
+ },
47
+ "model_name": "ai-forever/FRED-T5-1.7B",
48
+ "templates_path": "ru_alpaca_seq2seq_template.json",
49
+ "model_type": "seq2seq",
50
+ "max_source_tokens_count": 512,
51
+ "max_target_tokens_count": 512
52
+ }
53
+