possy_maskN10000T80 / t5marulog.jsonl
ace14459tv
拡散
17336a3
{"log": "trained", "date": "2023-05-08T11:23:05", "elapsed": "00:01:52", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:25:04", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:26:18", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:27:34", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:28:49", "elapsed": "00:01:07", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:30:02", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:31:17", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:32:34", "elapsed": "00:01:11", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:33:52", "elapsed": "00:01:12", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:35:13", "elapsed": "00:01:15", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:36:36", "elapsed": "00:01:11", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:37:55", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:39:11", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:40:28", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:41:47", "elapsed": "00:01:11", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:43:07", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:44:23", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:45:39", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:46:55", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:48:11", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:49:28", "elapsed": "00:01:12", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:50:47", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:52:04", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:53:21", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:54:38", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:55:55", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:57:11", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:58:28", "elapsed": "00:01:11", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T11:59:47", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:01:04", "elapsed": "00:01:11", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:02:21", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:03:38", "elapsed": "00:01:12", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:04:57", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:06:14", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:07:31", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:08:48", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:10:03", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:11:20", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:12:34", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:13:49", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:15:03", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:16:19", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:17:33", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:18:49", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:20:05", "elapsed": "00:01:08", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:21:19", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:22:35", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:23:50", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:25:05", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:26:21", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:27:37", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:28:52", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:30:08", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:31:23", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:32:39", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:33:55", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:35:11", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:36:27", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:37:44", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:38:59", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:40:15", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:41:30", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:42:46", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:44:02", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:45:18", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:46:35", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:47:51", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:49:07", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:50:23", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:51:38", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:52:56", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:54:11", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:55:27", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:56:43", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:57:59", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T12:59:15", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T13:00:32", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T13:01:47", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T13:03:03", "elapsed": "00:01:09", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}
{"log": "trained", "date": "2023-05-08T13:04:18", "elapsed": "00:01:10", "model": "maskN10000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 156, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 140, "saved": "maskN10000T80_new"}