{ "model": "dBert", "dataset": "optimized_strict_heteronyms", "experiment": "train-dBert", "run": "test", "num_train_epochs": 10, "evaluation_strategy": "steps", "eval_steps": 300, "logging_strategy": "steps", "logging_steps": 300, "save_strategy": "steps", "save_steps": 300, "learning_rate": 2e-5, "per_device_train_batch_size": 128, "per_device_eval_batch_size": 128, "load_best_model_at_end": true, "metric_for_best_model": "loss", "weight_decay": 0.01, "save_total_limit": 5, "report_to": "mlflow" }