|
{"log": "trained", "date": "2023-05-08T11:19:14", "elapsed": "00:01:39", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:21:01", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:22:03", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:23:05", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:24:07", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:25:08", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:26:10", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:27:11", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:28:12", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:29:15", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:30:17", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:31:19", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:32:20", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:33:21", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:34:23", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:35:24", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:36:25", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:37:27", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:38:28", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:39:29", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:40:31", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:41:33", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:42:37", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:43:38", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:44:40", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:45:41", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:46:42", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:47:44", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:48:45", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:49:46", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:50:48", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:51:49", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:52:51", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:53:54", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:54:56", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:55:57", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:56:58", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:57:59", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T11:59:02", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:00:04", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:01:05", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:02:07", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:03:09", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:04:11", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:05:14", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:06:17", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:07:20", "elapsed": "00:00:54", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:08:21", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:09:22", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:10:24", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:11:26", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:12:29", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:13:31", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:14:33", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:15:35", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:16:37", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:17:40", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:18:41", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:19:43", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:20:45", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:21:47", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:22:48", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:23:50", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:24:52", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:25:54", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:26:57", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:27:59", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:29:01", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:30:04", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:31:06", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:32:07", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:33:09", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:34:10", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:35:15", "elapsed": "00:00:57", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:36:21", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:37:24", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:38:27", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:39:30", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:40:32", "elapsed": "00:00:55", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
{"log": "trained", "date": "2023-05-08T12:41:34", "elapsed": "00:00:56", "model": "maskN1000T80_new", "max_length": 128, "target_max_length": 128, "batch_size": 64, "gradient_accumulation_steps": 1, "train_steps": 15, "accelerator": "gpu", "devices": "auto", "precision": "bf16", "strategy": "auto", "gradient_clip_val": 1.0, "compile": true, "solver": "adamw", "lr": 0.0003, "warmup_steps": 1, "training_steps": 100000, "adam_epsilon": 1e-08, "weight_decay": 0.0, "epoch": 1, "step": 14, "saved": "maskN1000T80_new"} |
|
|