{ "trainer": { "evaluation_strategy": "steps", "per_device_train_batch_size": 2, "per_device_eval_batch_size": 2, "gradient_accumulation_steps": 16, "eval_steps": 150, "save_steps": 150, "logging_steps": 5, "learning_rate": 0.003, "num_train_epochs": 5, "lr_scheduler_type": "cosine", "warmup_steps": 100, "fp16": false, "bf16": true, "gradient_checkpointing": false, "torch_compile": false, "optim": "adamw_torch", "half_precision_backend": "auto", "fp16_opt_level": "O2" }, "deepspeed": { "bf16": { "enabled": true }, "optimizer": { "type": "AdamW", "params": { "lr": "auto", "betas": "auto", "eps": "auto", "weight_decay": "auto" } }, "zero_optimization": { "stage": 2, "offload_optimizer": { "device": "cpu", "pin_memory": true }, "overlap_comm": true, "round_robin_gradients": true }, "train_batch_size": "auto", "gradient_accumulation_steps": "auto" }, "model_name": "ai-forever/FRED-T5-1.7B", "templates_path": "ru_alpaca_seq2seq_template.json", "model_type": "seq2seq", "max_source_tokens_count": 512, "max_target_tokens_count": 512 }