IlyaGusev
/

fred_t5_ru_turbo_alpaca

Text Generation

text2text-generation

text-generation-inference

Model card Files Files and versions Community

IlyaGusev commited on Apr 14, 2023

Commit

27a57d3

•

1 Parent(s): 3c4b721

Training config

Files changed (1) hide show

training_config.json +53 -0

training_config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+    "trainer": {
+        "evaluation_strategy": "steps",
+        "per_device_train_batch_size": 2,
+        "per_device_eval_batch_size": 2,
+        "gradient_accumulation_steps": 16,
+        "eval_steps": 150,
+        "save_steps": 150,
+        "logging_steps": 5,
+        "learning_rate": 0.003,
+        "num_train_epochs": 5,
+        "lr_scheduler_type": "cosine",
+        "warmup_steps": 100,
+        "fp16": false,
+        "bf16": true,
+        "gradient_checkpointing": false,
+        "torch_compile": false,
+        "optim": "adamw_torch",
+        "half_precision_backend": "auto",
+        "fp16_opt_level": "O2"
+    },
+    "deepspeed": {
+        "bf16": {
+            "enabled": true
+        },
+        "optimizer": {
+            "type": "AdamW",
+            "params": {
+                "lr": "auto",
+                "betas": "auto",
+                "eps": "auto",
+                "weight_decay": "auto"
+            }
+        },
+        "zero_optimization": {
+            "stage": 2,
+            "offload_optimizer": {
+                "device": "cpu",
+                "pin_memory": true
+            },
+            "overlap_comm": true,
+            "round_robin_gradients": true
+        },
+        "train_batch_size": "auto",
+        "gradient_accumulation_steps": "auto"
+    },
+    "model_name": "ai-forever/FRED-T5-1.7B",
+    "templates_path": "ru_alpaca_seq2seq_template.json",
+    "model_type": "seq2seq",
+    "max_source_tokens_count": 512,
+    "max_target_tokens_count": 512
+}