diff --git a/.gitattributes b/.gitattributes index 94c24d8df7f82a89be957fca947782500153f132..cb6f4355b425557abd0ef6a1c07f5a239270cc0d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -41,3 +41,8 @@ lr_sweep/hnet_xl_code_lr_5e-5/wandb/run-20260425_180603-5xd22ofy/run-5xd22ofy.wa lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/run-3z5g26qd.wandb filter=lfs diff=lfs merge=lfs -text lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_184822-bhvwo83l/run-bhvwo83l.wandb filter=lfs diff=lfs merge=lfs -text lr_sweep/pythia_1b_lr_5e-5/wandb/run-20260425_193045-vg3if73m/run-vg3if73m.wandb filter=lfs diff=lfs merge=lfs -text +lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/run-i6lt8av0.wandb filter=lfs diff=lfs merge=lfs -text +lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/run-d5usyud5.wandb filter=lfs diff=lfs merge=lfs -text +lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/run-p8ozhgpm.wandb filter=lfs diff=lfs merge=lfs -text +lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/run-ln6tfunh.wandb filter=lfs diff=lfs merge=lfs -text +lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/run-khn25dwv.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/lr_sweep/hnet_xl_code_lr_1e-4/model_final.pt b/lr_sweep/hnet_xl_code_lr_1e-4/model_final.pt new file mode 100644 index 0000000000000000000000000000000000000000..3445e0d9d24a5fff0e9b70a7587e9e03d440814b --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_1e-4/model_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5507a5a2456bffdc961775096b62d8f9bebb9dd301fc350b5878213252b348e7 +size 3315165484 diff --git a/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/code/code_completion_exp/train_hnet/train.py b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/code/code_completion_exp/train_hnet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7c306fe6e62d718f1815d106471a779b413a20 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/code/code_completion_exp/train_hnet/train.py @@ -0,0 +1,284 @@ +""" +Training Pipeline для HNet модели на задаче Code Completion. + +Конфигурация через Hydra + OmegaConf, логирование в Trackio. +Поддержка DDP через Accelerate для multi-GPU тренировки. + +Использование: + # Базовый запуск (single GPU) + python train.py + + # Multi-GPU с Accelerate + accelerate launch train.py + + # Multi-GPU с указанием количества GPU + accelerate launch --num_processes=4 train.py + + # Переопределение параметров через CLI + python train.py training.lr=1e-4 training.epochs=5 + + # Выбор другого конфига модели + python train.py model=hnet_small + + # Multirun (sweep) + python train.py --multirun training.lr=1e-4,3e-4,1e-3 + + # Без логирования + python train.py tracking.enabled=false +""" + +import os +import math +from pathlib import Path + +import torch +import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from accelerate import Accelerator +from accelerate.utils import set_seed as accelerate_set_seed + +# HNet imports +from hnet.load_utils import load_from_pretrained, load_from_config +from hnet.utils.tokenizers import ByteTokenizer +from hnet.utils.train import group_params + +# Ensure repo root is on sys.path (needed when running from subdirectory) +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# Shared training library +from training_lib.utils import log_message +from training_lib.checkpointing import save_checkpoint, load_checkpoint +from training_lib.schedulers import get_lr_scheduler +from training_lib.tracking import init_tracking, finish_tracking +from training_lib.hnet.train_loop import train_epoch +from training_lib.hnet.data import create_dataloaders + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def main(cfg: DictConfig): + """Глав��ая функция тренировки с поддержкой DDP чере�� Accelerate.""" + + # === Accelerator Setup === + mixed_precision = "bf16" if cfg.training.use_amp else "no" + + accelerator = Accelerator( + mixed_precision=mixed_precision, + gradient_accumulation_steps=cfg.training.gradient_accumulation_steps, + ) + + # === Setup === + accelerate_set_seed(cfg.seed) + + if cfg.paths.output_dir is None: + cfg.paths.output_dir = HydraConfig.get().runtime.output_dir + + OmegaConf.resolve(cfg) + + log_message( + f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'not set')}", + cfg, + accelerator, + ) + log_message(f"Number of processes: {accelerator.num_processes}", cfg, accelerator) + log_message(f"Process index: {accelerator.process_index}", cfg, accelerator) + log_message(f"Mixed precision: {mixed_precision}", cfg, accelerator) + + log_message("=" * 60, cfg, accelerator) + log_message( + "HNet Training Pipeline (Hydra + Trackio + Accelerate)", cfg, accelerator + ) + log_message("=" * 60, cfg, accelerator) + log_message(f"Config:\n{OmegaConf.to_yaml(cfg)}", cfg, accelerator) + + # === Trackio Init === + init_tracking(cfg, accelerator) + + # === Tokenizer === + log_message("Initializing tokenizer...", cfg, accelerator) + tokenizer = ByteTokenizer() + + # === Model === + log_message("Loading model...", cfg, accelerator) + if cfg.model.checkpoint_path: + model = load_from_pretrained( + model_path=cfg.model.checkpoint_path, + model_config_path=cfg.model.config_path, + ) + log_message(f"Loaded pretrained: {cfg.model.checkpoint_path}", cfg, accelerator) + else: + model = load_from_config( + model_config_path=cfg.model.config_path, + device="cpu", + ) + model.init_weights() + log_message("Initialized from scratch", cfg, accelerator) + + model.train() + + # LR multiplier для разны�� стадий (до prepare!) + lr_multiplier = list(cfg.training.lr_multiplier) + model.apply_lr_multiplier(lr_multiplier) + log_message(f"Applied LR multipliers: {lr_multiplier}", cfg, accelerator) + + # Warmup для Triton kernels + if cfg.training.warmup_model: + log_message("Warming up model...", cfg, accelerator) + model = model.to(accelerator.device) + model.warmup(verbose=accelerator.is_main_process) + + # Log model info + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + log_message(f"Total params: {total_params:,}", cfg, accelerator) + log_message(f"Trainable params: {trainable_params:,}", cfg, accelerator) + + # === Data === + log_message("Creating dataloaders...", cfg, accelerator) + dataloaders = create_dataloaders(cfg, tokenizer) + + train_dataloader = dataloaders["train"] + val_dataloader = dataloaders.get("validation", None) + + log_message( + f"Train dataset size: {len(train_dataloader.dataset)}", cfg, accelerator + ) + log_message( + f"Train batches per epoch (before DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + if val_dataloader: + log_message( + f"Validation dataset size: {len(val_dataloader.dataset)}", cfg, accelerator + ) + log_message(f"Validation batches: {len(val_dataloader)}", cfg, accelerator) + else: + log_message("No validation dataset found", cfg, accelerator) + + # === Optimizer === + log_message("Creating optimizer...", cfg, accelerator) + param_groups = group_params(model) + + for group in param_groups: + if "lr" not in group: + group["lr"] = cfg.training.lr + else: + group["lr"] = cfg.training.lr * group.get("lr_multiplier", 1.0) + if "weight_decay" not in group: + group["weight_decay"] = cfg.training.weight_decay + + optimizer = torch.optim.AdamW( + param_groups, + lr=cfg.training.lr, + betas=tuple(cfg.training.betas), + eps=cfg.training.eps, + ) + + # === Scheduler === + steps_per_epoch = math.ceil(len(train_dataloader) / accelerator.num_processes) + total_steps = ( + cfg.training.epochs + * steps_per_epoch + // cfg.training.gradient_accumulation_steps + ) + scheduler = get_lr_scheduler(optimizer, cfg, total_steps) + + log_message( + f"Total steps: {total_steps}, Steps per epoch: {steps_per_epoch}", + cfg, + accelerator, + ) + + # === Accelerate Prepare === + log_message( + "Preparing model, optimizer, and dataloaders with Accelerate...", + cfg, + accelerator, + ) + + if val_dataloader is not None: + model, optimizer, train_dataloader, val_dataloader, scheduler = ( + accelerator.prepare( + model, optimizer, train_dataloader, val_dataloader, scheduler + ) + ) + else: + model, optimizer, train_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, scheduler + ) + + log_message( + f"Train batches per epoch (after DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + # === Resume === + global_step = 0 + start_epoch = 1 + + if cfg.training.resume and cfg.training.resume_checkpoint: + global_step, start_epoch = load_checkpoint( + model, + optimizer, + scheduler, + cfg.training.resume_checkpoint, + cfg, + accelerator, + ) + start_epoch += 1 + + # === Training Loop === + log_message("Starting training...", cfg, accelerator) + + best_val_loss = float("inf") + + try: + for epoch in range(start_epoch, cfg.training.epochs + 1): + log_message(f"\n{'=' * 60}", cfg, accelerator) + log_message(f"EPOCH {epoch}/{cfg.training.epochs}", cfg, accelerator) + log_message(f"{'=' * 60}", cfg, accelerator) + + global_step, best_val_loss = train_epoch( + model=model, + dataloader=train_dataloader, + optimizer=optimizer, + scheduler=scheduler, + cfg=cfg, + epoch=epoch, + global_step=global_step, + accelerator=accelerator, + val_dataloader=val_dataloader, + best_val_loss=best_val_loss, + ) + + if cfg.logging.save_every_epoch: + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + except KeyboardInterrupt: + log_message("Training interrupted by user", cfg, accelerator) + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + # === Final Save === + log_message("\nTraining completed!", cfg, accelerator) + + if accelerator.is_main_process: + final_model_path = Path(cfg.paths.output_dir) / "model_final.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), final_model_path) + log_message(f"Final model: {final_model_path}", cfg, accelerator) + + accelerator.wait_for_everyone() + accelerator.end_training() + finish_tracking() + + +if __name__ == "__main__": + main() diff --git a/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/config.yaml b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..e50e8bd8884a688edbf1a1299bf5bbd19f31e3c8 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/config.yaml @@ -0,0 +1,167 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + code_path: code/code_completion_exp/train_hnet/train.py + python_version: 3.12.0 + cli_version: 0.24.0 + framework: huggingface + huggingface_version: 4.57.6 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1777147642 + t: + 1: + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + 3: + - 2 + - 13 + - 16 + - 37 + - 42 + - 61 + 4: 3.12.0 + 5: 0.24.0 + 6: 4.57.6 + 13: linux-x86_64 + e: + yd4im4gytbm7o9yud168kac4xfyaj2kg: + os: Linux-5.4.0-176-generic-x86_64-with-glibc2.35 + python: CPython 3.12.0 + started_at: '2026-04-25T20:07:22.253382Z' + args: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=hnet_xl_code_lr_1e-4 + - training.lr=1e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_1e-4 + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + program: /workspace/byte-llms-code/code_completion_exp/train_hnet/train.py + code_path: code_completion_exp/train_hnet/train.py + code_path_local: train.py + git: + remote_url: https://github.com/naryst/byte-llms-code.git + commit: f111e13281aa0dc58e24302edab5b0d5c2024586 + email: nikita@local.ru + root: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_1e-4 + host: 7504e518d24a + executable: /venv/bytellm/bin/python + cpu_count: 64 + cpu_count_logical: 128 + gpu_type: NVIDIA H100 80GB HBM3 + gpu_count: 4 + disk: + /: + total: '265214230528' + used: '104071081984' + memory: + total: '1081679683584' + gpu_nvidia: + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-b60cdcab-2033-2009-41de-be646c953a20 + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-9982b420-4520-4238-c378-ec5a46015474 + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134 + cuda_version: '12.2' + writer_id: yd4im4gytbm7o9yud168kac4xfyaj2kg +model: + desc: null + value: + config_path: /workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json + checkpoint_path: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt +training: + desc: null + value: + epochs: 1 + batch_size: 4 + eval_batch_size: 24 + gradient_accumulation_steps: 4 + lr: 0.0001 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + lr_multiplier: + - 2.0 + - 1.5 + - 1.0 + load_balancing_weight: 0.01 + load_balancing_N: 4.0 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null + warmup_model: true +data: + desc: null + value: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 0 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + desc: null + value: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + desc: null + value: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: hnet_xl_code_lr_1e-4 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_1e-4 +paths: + desc: null + value: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_1e-4 +seed: + desc: null + value: 42 +device: + desc: null + value: cuda diff --git a/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/output.log b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..e488e6e090c6f36c8e9da177900f34fa4f3c07eb --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/output.log @@ -0,0 +1,1045 @@ +[2026-04-25 20:07:23] Initializing tokenizer... +[2026-04-25 20:07:23] Loading model... +[2026-04-25 20:07:28] Loaded pretrained: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt +[2026-04-25 20:07:28] Applied LR multipliers: [2.0, 1.5, 1.0] +[2026-04-25 20:07:28] Warming up model... +[WARMUP] Starting warmup (compiling Triton kernels)... +[WARMUP] Forward: 17.237s, Backward: 26.466s +[WARMUP] Warmup complete. Subsequent passes will be fast. +[2026-04-25 20:08:11] Total params: 1,654,090,112 +[2026-04-25 20:08:11] Trainable params: 1,654,090,112 +[2026-04-25 20:08:11] Creating dataloaders... +[2026-04-25 20:08:11] Train dataset size: 316397 +[2026-04-25 20:08:11] Train batches per epoch (before DDP split): 79100 +[2026-04-25 20:08:11] Validation dataset size: 2000 +[2026-04-25 20:08:11] Validation batches: 84 +[2026-04-25 20:08:11] Creating optimizer... +[2026-04-25 20:08:11] Total steps: 9887, Steps per epoch: 39550 +[2026-04-25 20:08:11] Preparing model, optimizer, and dataloaders with Accelerate... +[2026-04-25 20:08:12] Train batches per epoch (after DDP split): 39550 +[2026-04-25 20:08:12] Starting training... +[2026-04-25 20:08:12] +============================================================ +[2026-04-25 20:08:12] EPOCH 1/1 +[2026-04-25 20:08:12] ============================================================ +[2026-04-25 20:09:07] Epoch 1 | Step 10 | Loss: 0.6473 | LM: 0.6116 | LB: 1.1577 | CL0: 2.9 | CL1: 2.1 | HR0: 0.351/SR0: 0.351 | HR1: 0.476/SR1: 0.455 | LR: 1.18e-05 +[2026-04-25 20:09:15] Epoch 1 | Step 20 | Loss: 0.6433 | LM: 0.6306 | LB: 1.1554 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.475/SR1: 0.455 | LR: 1.36e-05 +[2026-04-25 20:09:22] Epoch 1 | Step 30 | Loss: 0.6173 | LM: 0.5970 | LB: 1.1526 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.473/SR1: 0.452 | LR: 1.55e-05 +[2026-04-25 20:09:29] Epoch 1 | Step 40 | Loss: 0.6019 | LM: 0.5722 | LB: 1.1608 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.478/SR1: 0.456 | LR: 1.73e-05 +[2026-04-25 20:09:36] Epoch 1 | Step 50 | Loss: 0.5685 | LM: 0.5288 | LB: 1.1597 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.476/SR1: 0.456 | LR: 1.91e-05 +[2026-04-25 20:09:43] Epoch 1 | Step 60 | Loss: 0.5394 | LM: 0.5020 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.458 | LR: 2.09e-05 +[2026-04-25 20:09:50] Epoch 1 | Step 70 | Loss: 0.5170 | LM: 0.4868 | LB: 1.1621 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.458 | LR: 2.28e-05 +[2026-04-25 20:09:57] Epoch 1 | Step 80 | Loss: 0.5018 | LM: 0.4809 | LB: 1.1636 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.459 | LR: 2.46e-05 +[2026-04-25 20:10:05] Epoch 1 | Step 90 | Loss: 0.4886 | LM: 0.4675 | LB: 1.1645 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.459 | LR: 2.64e-05 +[2026-04-25 20:10:12] Epoch 1 | Step 100 | Loss: 0.4758 | LM: 0.4668 | LB: 1.1644 | CL0: 2.8 | CL1: 2.1 | HR0: 0.360/SR0: 0.357 | HR1: 0.479/SR1: 0.458 | LR: 2.82e-05 +[2026-04-25 20:10:19] Epoch 1 | Step 110 | Loss: 0.4698 | LM: 0.4682 | LB: 1.1654 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.356 | HR1: 0.481/SR1: 0.459 | LR: 3.00e-05 +[2026-04-25 20:10:26] Epoch 1 | Step 120 | Loss: 0.4621 | LM: 0.4535 | LB: 1.1648 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.481/SR1: 0.459 | LR: 3.19e-05 +[2026-04-25 20:10:33] Epoch 1 | Step 130 | Loss: 0.4549 | LM: 0.4460 | LB: 1.1648 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 3.37e-05 +[2026-04-25 20:10:40] Epoch 1 | Step 140 | Loss: 0.4481 | LM: 0.4439 | LB: 1.1656 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 3.55e-05 +[2026-04-25 20:10:48] Epoch 1 | Step 150 | Loss: 0.4388 | LM: 0.4282 | LB: 1.1653 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.481/SR1: 0.460 | LR: 3.73e-05 +[2026-04-25 20:10:55] Epoch 1 | Step 160 | Loss: 0.4315 | LM: 0.4208 | LB: 1.1661 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.481/SR1: 0.460 | LR: 3.91e-05 +[2026-04-25 20:11:02] Epoch 1 | Step 170 | Loss: 0.4291 | LM: 0.4253 | LB: 1.1639 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.459 | LR: 4.10e-05 +[2026-04-25 20:11:09] Epoch 1 | Step 180 | Loss: 0.4218 | LM: 0.4186 | LB: 1.1631 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.458 | LR: 4.28e-05 +[2026-04-25 20:11:16] Epoch 1 | Step 190 | Loss: 0.4183 | LM: 0.4144 | LB: 1.1621 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.478/SR1: 0.457 | LR: 4.46e-05 +[2026-04-25 20:11:23] Epoch 1 | Step 200 | Loss: 0.4145 | LM: 0.4155 | LB: 1.1614 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.457 | LR: 4.64e-05 +[2026-04-25 20:11:30] Epoch 1 | Step 210 | Loss: 0.4124 | LM: 0.4163 | LB: 1.1614 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.479/SR1: 0.457 | LR: 4.83e-05 +[2026-04-25 20:11:37] Epoch 1 | Step 220 | Loss: 0.4084 | LM: 0.4153 | LB: 1.1621 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.458 | LR: 5.01e-05 +[2026-04-25 20:11:45] Epoch 1 | Step 230 | Loss: 0.4030 | LM: 0.4072 | LB: 1.1625 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 5.19e-05 +[2026-04-25 20:11:52] Epoch 1 | Step 240 | Loss: 0.3996 | LM: 0.4011 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 5.37e-05 +[2026-04-25 20:11:59] Epoch 1 | Step 250 | Loss: 0.3963 | LM: 0.3946 | LB: 1.1623 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 5.55e-05 +[2026-04-25 20:12:06] Epoch 1 | Step 260 | Loss: 0.3946 | LM: 0.3958 | LB: 1.1625 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.481/SR1: 0.459 | LR: 5.74e-05 +[2026-04-25 20:12:13] Epoch 1 | Step 270 | Loss: 0.3909 | LM: 0.3947 | LB: 1.1617 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.480/SR1: 0.458 | LR: 5.92e-05 +[2026-04-25 20:12:20] Epoch 1 | Step 280 | Loss: 0.3871 | LM: 0.3950 | LB: 1.1619 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.480/SR1: 0.458 | LR: 6.10e-05 +[2026-04-25 20:12:27] Epoch 1 | Step 290 | Loss: 0.3842 | LM: 0.3933 | LB: 1.1620 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 6.28e-05 +[2026-04-25 20:12:35] Epoch 1 | Step 300 | Loss: 0.3825 | LM: 0.3919 | LB: 1.1625 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 6.47e-05 +[2026-04-25 20:12:42] Epoch 1 | Step 310 | Loss: 0.3802 | LM: 0.3890 | LB: 1.1620 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.457 | LR: 6.65e-05 +[2026-04-25 20:12:49] Epoch 1 | Step 320 | Loss: 0.3769 | LM: 0.3854 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 6.83e-05 +[2026-04-25 20:12:56] Epoch 1 | Step 330 | Loss: 0.3743 | LM: 0.3833 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 7.01e-05 +[2026-04-25 20:13:03] Epoch 1 | Step 340 | Loss: 0.3716 | LM: 0.3805 | LB: 1.1623 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 7.19e-05 +[2026-04-25 20:13:10] Epoch 1 | Step 350 | Loss: 0.3699 | LM: 0.3788 | LB: 1.1624 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.457 | LR: 7.38e-05 +[2026-04-25 20:13:17] Epoch 1 | Step 360 | Loss: 0.3668 | LM: 0.3751 | LB: 1.1620 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 7.56e-05 +[2026-04-25 20:13:25] Epoch 1 | Step 370 | Loss: 0.3640 | LM: 0.3730 | LB: 1.1615 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 7.74e-05 +[2026-04-25 20:13:32] Epoch 1 | Step 380 | Loss: 0.3615 | LM: 0.3715 | LB: 1.1614 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 7.92e-05 +[2026-04-25 20:13:39] Epoch 1 | Step 390 | Loss: 0.3600 | LM: 0.3693 | LB: 1.1617 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 8.11e-05 +[2026-04-25 20:13:46] Epoch 1 | Step 400 | Loss: 0.3588 | LM: 0.3682 | LB: 1.1610 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.456 | LR: 8.29e-05 +[2026-04-25 20:13:53] Epoch 1 | Step 410 | Loss: 0.3572 | LM: 0.3675 | LB: 1.1608 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 8.47e-05 +[2026-04-25 20:14:00] Epoch 1 | Step 420 | Loss: 0.3553 | LM: 0.3662 | LB: 1.1609 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.479/SR1: 0.456 | LR: 8.65e-05 +[2026-04-25 20:14:08] Epoch 1 | Step 430 | Loss: 0.3547 | LM: 0.3639 | LB: 1.1606 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 8.83e-05 +[2026-04-25 20:14:15] Epoch 1 | Step 440 | Loss: 0.3523 | LM: 0.3617 | LB: 1.1603 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.478/SR1: 0.456 | LR: 9.02e-05 +[2026-04-25 20:14:22] Epoch 1 | Step 450 | Loss: 0.3513 | LM: 0.3607 | LB: 1.1599 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.455 | LR: 9.20e-05 +[2026-04-25 20:14:29] Epoch 1 | Step 460 | Loss: 0.3502 | LM: 0.3589 | LB: 1.1599 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.455 | LR: 9.38e-05 +[2026-04-25 20:14:36] Epoch 1 | Step 470 | Loss: 0.3484 | LM: 0.3569 | LB: 1.1596 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.477/SR1: 0.455 | LR: 9.56e-05 +[2026-04-25 20:14:43] Epoch 1 | Step 480 | Loss: 0.3471 | LM: 0.3548 | LB: 1.1596 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.477/SR1: 0.455 | LR: 9.74e-05 +[2026-04-25 20:14:50] Epoch 1 | Step 490 | Loss: 0.3457 | LM: 0.3528 | LB: 1.1595 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.477/SR1: 0.455 | LR: 9.93e-05 +[2026-04-25 20:14:57] Epoch 1 | Step 500 | Loss: 0.3445 | LM: 0.3500 | LB: 1.1592 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.477/SR1: 0.455 | LR: 1.00e-04 +[2026-04-25 20:15:05] Epoch 1 | Step 510 | Loss: 0.3429 | LM: 0.3469 | LB: 1.1590 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.477/SR1: 0.454 | LR: 1.00e-04 +[2026-04-25 20:15:12] Epoch 1 | Step 520 | Loss: 0.3425 | LM: 0.3470 | LB: 1.1585 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.476/SR1: 0.454 | LR: 1.00e-04 +[2026-04-25 20:15:19] Epoch 1 | Step 530 | Loss: 0.3415 | LM: 0.3463 | LB: 1.1581 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.476/SR1: 0.454 | LR: 1.00e-04 +[2026-04-25 20:15:26] Epoch 1 | Step 540 | Loss: 0.3401 | LM: 0.3454 | LB: 1.1577 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.476/SR1: 0.454 | LR: 1.00e-04 +[2026-04-25 20:15:33] Epoch 1 | Step 550 | Loss: 0.3386 | LM: 0.3437 | LB: 1.1574 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.476/SR1: 0.453 | LR: 1.00e-04 +[2026-04-25 20:15:40] Epoch 1 | Step 560 | Loss: 0.3378 | LM: 0.3413 | LB: 1.1570 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.475/SR1: 0.453 | LR: 1.00e-04 +[2026-04-25 20:15:47] Epoch 1 | Step 570 | Loss: 0.3370 | LM: 0.3405 | LB: 1.1570 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.475/SR1: 0.453 | LR: 1.00e-04 +[2026-04-25 20:15:55] Epoch 1 | Step 580 | Loss: 0.3362 | LM: 0.3395 | LB: 1.1566 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.475/SR1: 0.453 | LR: 1.00e-04 +[2026-04-25 20:16:02] Epoch 1 | Step 590 | Loss: 0.3357 | LM: 0.3389 | LB: 1.1562 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.475/SR1: 0.452 | LR: 1.00e-04 +[2026-04-25 20:16:09] Epoch 1 | Step 600 | Loss: 0.3346 | LM: 0.3370 | LB: 1.1556 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.474/SR1: 0.452 | LR: 1.00e-04 +[2026-04-25 20:16:16] Epoch 1 | Step 610 | Loss: 0.3346 | LM: 0.3358 | LB: 1.1554 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.474/SR1: 0.452 | LR: 1.00e-04 +[2026-04-25 20:16:23] Epoch 1 | Step 620 | Loss: 0.3342 | LM: 0.3353 | LB: 1.1552 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.474/SR1: 0.452 | LR: 1.00e-04 +[2026-04-25 20:16:30] Epoch 1 | Step 630 | Loss: 0.3335 | LM: 0.3354 | LB: 1.1550 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.474/SR1: 0.451 | LR: 1.00e-04 +[2026-04-25 20:16:38] Epoch 1 | Step 640 | Loss: 0.3332 | LM: 0.3342 | LB: 1.1547 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.473/SR1: 0.451 | LR: 1.00e-04 +[2026-04-25 20:16:45] Epoch 1 | Step 650 | Loss: 0.3324 | LM: 0.3333 | LB: 1.1543 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.473/SR1: 0.451 | LR: 1.00e-04 +[2026-04-25 20:16:52] Epoch 1 | Step 660 | Loss: 0.3323 | LM: 0.3324 | LB: 1.1540 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.473/SR1: 0.450 | LR: 1.00e-04 +[2026-04-25 20:16:59] Epoch 1 | Step 670 | Loss: 0.3316 | LM: 0.3314 | LB: 1.1538 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.472/SR1: 0.450 | LR: 1.00e-04 +[2026-04-25 20:17:06] Epoch 1 | Step 680 | Loss: 0.3311 | LM: 0.3315 | LB: 1.1539 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.473/SR1: 0.450 | LR: 1.00e-04 +[2026-04-25 20:17:13] Epoch 1 | Step 690 | Loss: 0.3306 | LM: 0.3318 | LB: 1.1534 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.472/SR1: 0.450 | LR: 1.00e-04 +[2026-04-25 20:17:20] Epoch 1 | Step 700 | Loss: 0.3306 | LM: 0.3333 | LB: 1.1531 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.472/SR1: 0.450 | LR: 1.00e-04 +[2026-04-25 20:17:28] Epoch 1 | Step 710 | Loss: 0.3297 | LM: 0.3334 | LB: 1.1529 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.472/SR1: 0.449 | LR: 1.00e-04 +[2026-04-25 20:17:35] Epoch 1 | Step 720 | Loss: 0.3292 | LM: 0.3331 | LB: 1.1525 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.471/SR1: 0.449 | LR: 1.00e-04 +[2026-04-25 20:17:42] Epoch 1 | Step 730 | Loss: 0.3288 | LM: 0.3325 | LB: 1.1523 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.471/SR1: 0.449 | LR: 1.00e-04 +[2026-04-25 20:17:49] Epoch 1 | Step 740 | Loss: 0.3282 | LM: 0.3319 | LB: 1.1520 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.471/SR1: 0.449 | LR: 1.00e-04 +[2026-04-25 20:17:56] Epoch 1 | Step 750 | Loss: 0.3275 | LM: 0.3309 | LB: 1.1514 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.470/SR1: 0.448 | LR: 1.00e-04 +[2026-04-25 20:18:03] Epoch 1 | Step 760 | Loss: 0.3277 | LM: 0.3302 | LB: 1.1511 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.470/SR1: 0.448 | LR: 1.00e-04 +[2026-04-25 20:18:10] Epoch 1 | Step 770 | Loss: 0.3278 | LM: 0.3300 | LB: 1.1509 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.470/SR1: 0.448 | LR: 1.00e-04 +[2026-04-25 20:18:18] Epoch 1 | Step 780 | Loss: 0.3272 | LM: 0.3291 | LB: 1.1505 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.470/SR1: 0.447 | LR: 1.00e-04 +[2026-04-25 20:18:25] Epoch 1 | Step 790 | Loss: 0.3266 | LM: 0.3285 | LB: 1.1503 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.470/SR1: 0.447 | LR: 1.00e-04 +[2026-04-25 20:18:32] Epoch 1 | Step 800 | Loss: 0.3257 | LM: 0.3270 | LB: 1.1500 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.470/SR1: 0.447 | LR: 1.00e-04 +[2026-04-25 20:18:39] Epoch 1 | Step 810 | Loss: 0.3254 | LM: 0.3271 | LB: 1.1499 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.469/SR1: 0.447 | LR: 1.00e-04 +[2026-04-25 20:18:46] Epoch 1 | Step 820 | Loss: 0.3250 | LM: 0.3265 | LB: 1.1497 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.469/SR1: 0.447 | LR: 1.00e-04 +[2026-04-25 20:18:53] Epoch 1 | Step 830 | Loss: 0.3249 | LM: 0.3268 | LB: 1.1494 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.469/SR1: 0.447 | LR: 1.00e-04 +[2026-04-25 20:19:00] Epoch 1 | Step 840 | Loss: 0.3246 | LM: 0.3259 | LB: 1.1493 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.469/SR1: 0.446 | LR: 1.00e-04 +[2026-04-25 20:19:08] Epoch 1 | Step 850 | Loss: 0.3239 | LM: 0.3254 | LB: 1.1491 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.469/SR1: 0.446 | LR: 1.00e-04 +[2026-04-25 20:19:15] Epoch 1 | Step 860 | Loss: 0.3236 | LM: 0.3242 | LB: 1.1489 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.469/SR1: 0.446 | LR: 1.00e-04 +[2026-04-25 20:19:22] Epoch 1 | Step 870 | Loss: 0.3237 | LM: 0.3239 | LB: 1.1487 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.468/SR1: 0.446 | LR: 1.00e-04 +[2026-04-25 20:19:29] Epoch 1 | Step 880 | Loss: 0.3235 | LM: 0.3239 | LB: 1.1486 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.468/SR1: 0.446 | LR: 1.00e-04 +[2026-04-25 20:19:36] Epoch 1 | Step 890 | Loss: 0.3232 | LM: 0.3233 | LB: 1.1484 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.468/SR1: 0.445 | LR: 1.00e-04 +[2026-04-25 20:19:43] Epoch 1 | Step 900 | Loss: 0.3230 | LM: 0.3227 | LB: 1.1485 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.468/SR1: 0.445 | LR: 1.00e-04 +[2026-04-25 20:19:50] Epoch 1 | Step 910 | Loss: 0.3230 | LM: 0.3221 | LB: 1.1481 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.468/SR1: 0.445 | LR: 1.00e-04 +[2026-04-25 20:19:58] Epoch 1 | Step 920 | Loss: 0.3232 | LM: 0.3221 | LB: 1.1480 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.468/SR1: 0.445 | LR: 1.00e-04 +[2026-04-25 20:20:05] Epoch 1 | Step 930 | Loss: 0.3229 | LM: 0.3213 | LB: 1.1477 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.445 | LR: 1.00e-04 +[2026-04-25 20:20:12] Epoch 1 | Step 940 | Loss: 0.3229 | LM: 0.3209 | LB: 1.1475 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.444 | LR: 1.00e-04 +[2026-04-25 20:20:19] Epoch 1 | Step 950 | Loss: 0.3222 | LM: 0.3206 | LB: 1.1474 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.444 | LR: 1.00e-04 +[2026-04-25 20:20:26] Epoch 1 | Step 960 | Loss: 0.3216 | LM: 0.3208 | LB: 1.1472 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.444 | LR: 1.00e-04 +[2026-04-25 20:20:33] Epoch 1 | Step 970 | Loss: 0.3213 | LM: 0.3211 | LB: 1.1469 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.444 | LR: 1.00e-04 +[2026-04-25 20:20:41] Epoch 1 | Step 980 | Loss: 0.3207 | LM: 0.3198 | LB: 1.1468 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.444 | LR: 1.00e-04 +[2026-04-25 20:20:48] Epoch 1 | Step 990 | Loss: 0.3201 | LM: 0.3195 | LB: 1.1466 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.443 | LR: 1.00e-04 +[2026-04-25 20:20:55] Epoch 1 | Step 1000 | Loss: 0.3197 | LM: 0.3186 | LB: 1.1462 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.443 | LR: 1.00e-04 +[2026-04-25 20:21:02] Epoch 1 | Step 1010 | Loss: 0.3198 | LM: 0.3188 | LB: 1.1460 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.443 | LR: 1.00e-04 +[2026-04-25 20:21:09] Epoch 1 | Step 1020 | Loss: 0.3197 | LM: 0.3190 | LB: 1.1458 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.443 | LR: 1.00e-04 +[2026-04-25 20:21:16] Epoch 1 | Step 1030 | Loss: 0.3196 | LM: 0.3189 | LB: 1.1455 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.443 | LR: 1.00e-04 +[2026-04-25 20:21:23] Epoch 1 | Step 1040 | Loss: 0.3190 | LM: 0.3182 | LB: 1.1453 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:21:30] Epoch 1 | Step 1050 | Loss: 0.3186 | LM: 0.3178 | LB: 1.1451 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:21:37] Epoch 1 | Step 1060 | Loss: 0.3178 | LM: 0.3168 | LB: 1.1450 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:21:45] Epoch 1 | Step 1070 | Loss: 0.3175 | LM: 0.3160 | LB: 1.1448 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:21:52] Epoch 1 | Step 1080 | Loss: 0.3177 | LM: 0.3150 | LB: 1.1448 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:21:59] Epoch 1 | Step 1090 | Loss: 0.3181 | LM: 0.3155 | LB: 1.1447 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:22:06] Epoch 1 | Step 1100 | Loss: 0.3178 | LM: 0.3155 | LB: 1.1446 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.442 | LR: 1.00e-04 +[2026-04-25 20:22:13] Epoch 1 | Step 1110 | Loss: 0.3178 | LM: 0.3148 | LB: 1.1445 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.441 | LR: 1.00e-04 +[2026-04-25 20:22:20] Epoch 1 | Step 1120 | Loss: 0.3179 | LM: 0.3146 | LB: 1.1444 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.441 | LR: 1.00e-04 +[2026-04-25 20:22:27] Epoch 1 | Step 1130 | Loss: 0.3180 | LM: 0.3139 | LB: 1.1441 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.441 | LR: 1.00e-04 +[2026-04-25 20:22:34] Epoch 1 | Step 1140 | Loss: 0.3178 | LM: 0.3136 | LB: 1.1438 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.441 | LR: 1.00e-04 +[2026-04-25 20:22:42] Epoch 1 | Step 1150 | Loss: 0.3173 | LM: 0.3129 | LB: 1.1437 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.441 | LR: 1.00e-04 +[2026-04-25 20:22:49] Epoch 1 | Step 1160 | Loss: 0.3173 | LM: 0.3131 | LB: 1.1434 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.440 | LR: 1.00e-04 +[2026-04-25 20:22:56] Epoch 1 | Step 1170 | Loss: 0.3174 | LM: 0.3127 | LB: 1.1432 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.440 | LR: 1.00e-04 +[2026-04-25 20:23:03] Epoch 1 | Step 1180 | Loss: 0.3173 | LM: 0.3127 | LB: 1.1429 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.440 | LR: 1.00e-04 +[2026-04-25 20:23:10] Epoch 1 | Step 1190 | Loss: 0.3172 | LM: 0.3126 | LB: 1.1428 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.440 | LR: 1.00e-04 +[2026-04-25 20:23:17] Epoch 1 | Step 1200 | Loss: 0.3168 | LM: 0.3117 | LB: 1.1426 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.440 | LR: 1.00e-04 +[2026-04-25 20:23:24] Epoch 1 | Step 1210 | Loss: 0.3165 | LM: 0.3116 | LB: 1.1425 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:23:32] Epoch 1 | Step 1220 | Loss: 0.3159 | LM: 0.3107 | LB: 1.1424 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:23:39] Epoch 1 | Step 1230 | Loss: 0.3159 | LM: 0.3110 | LB: 1.1423 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:23:46] Epoch 1 | Step 1240 | Loss: 0.3159 | LM: 0.3111 | LB: 1.1421 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:23:53] Epoch 1 | Step 1250 | Loss: 0.3156 | LM: 0.3112 | LB: 1.1420 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:24:00] Epoch 1 | Step 1260 | Loss: 0.3152 | LM: 0.3111 | LB: 1.1418 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:24:07] Epoch 1 | Step 1270 | Loss: 0.3148 | LM: 0.3107 | LB: 1.1418 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:24:14] Epoch 1 | Step 1280 | Loss: 0.3147 | LM: 0.3103 | LB: 1.1416 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.439 | LR: 1.00e-04 +[2026-04-25 20:24:22] Epoch 1 | Step 1290 | Loss: 0.3145 | LM: 0.3105 | LB: 1.1415 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:24:29] Epoch 1 | Step 1300 | Loss: 0.3142 | LM: 0.3103 | LB: 1.1415 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:24:36] Epoch 1 | Step 1310 | Loss: 0.3139 | LM: 0.3097 | LB: 1.1412 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:24:43] Epoch 1 | Step 1320 | Loss: 0.3139 | LM: 0.3092 | LB: 1.1410 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:24:50] Epoch 1 | Step 1330 | Loss: 0.3136 | LM: 0.3089 | LB: 1.1408 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:24:57] Epoch 1 | Step 1340 | Loss: 0.3136 | LM: 0.3090 | LB: 1.1406 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:25:04] Epoch 1 | Step 1350 | Loss: 0.3136 | LM: 0.3093 | LB: 1.1408 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:25:11] Epoch 1 | Step 1360 | Loss: 0.3135 | LM: 0.3085 | LB: 1.1408 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:25:18] Epoch 1 | Step 1370 | Loss: 0.3133 | LM: 0.3082 | LB: 1.1406 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.438 | LR: 1.00e-04 +[2026-04-25 20:25:26] Epoch 1 | Step 1380 | Loss: 0.3135 | LM: 0.3080 | LB: 1.1404 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.437 | LR: 1.00e-04 +[2026-04-25 20:25:33] Epoch 1 | Step 1390 | Loss: 0.3139 | LM: 0.3083 | LB: 1.1401 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.437 | LR: 1.00e-04 +[2026-04-25 20:25:40] Epoch 1 | Step 1400 | Loss: 0.3138 | LM: 0.3080 | LB: 1.1398 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.437 | LR: 1.00e-04 +[2026-04-25 20:25:47] Epoch 1 | Step 1410 | Loss: 0.3134 | LM: 0.3069 | LB: 1.1397 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.437 | LR: 1.00e-04 +[2026-04-25 20:25:54] Epoch 1 | Step 1420 | Loss: 0.3132 | LM: 0.3068 | LB: 1.1397 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.437 | LR: 1.00e-04 +[2026-04-25 20:26:02] Epoch 1 | Step 1430 | Loss: 0.3130 | LM: 0.3067 | LB: 1.1396 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.437 | LR: 1.00e-04 +[2026-04-25 20:26:09] Epoch 1 | Step 1440 | Loss: 0.3127 | LM: 0.3064 | LB: 1.1395 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:16] Epoch 1 | Step 1450 | Loss: 0.3126 | LM: 0.3060 | LB: 1.1393 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:23] Epoch 1 | Step 1460 | Loss: 0.3122 | LM: 0.3055 | LB: 1.1392 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:30] Epoch 1 | Step 1470 | Loss: 0.3122 | LM: 0.3058 | LB: 1.1392 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:37] Epoch 1 | Step 1480 | Loss: 0.3120 | LM: 0.3054 | LB: 1.1390 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:44] Epoch 1 | Step 1490 | Loss: 0.3120 | LM: 0.3053 | LB: 1.1390 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:51] Epoch 1 | Step 1500 | Loss: 0.3118 | LM: 0.3050 | LB: 1.1388 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:26:59] Epoch 1 | Step 1510 | Loss: 0.3117 | LM: 0.3051 | LB: 1.1388 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:27:06] Epoch 1 | Step 1520 | Loss: 0.3116 | LM: 0.3048 | LB: 1.1388 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.436 | LR: 1.00e-04 +[2026-04-25 20:27:13] Epoch 1 | Step 1530 | Loss: 0.3114 | LM: 0.3046 | LB: 1.1386 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:27:20] Epoch 1 | Step 1540 | Loss: 0.3115 | LM: 0.3049 | LB: 1.1386 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:27:27] Epoch 1 | Step 1550 | Loss: 0.3114 | LM: 0.3046 | LB: 1.1385 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:27:34] Epoch 1 | Step 1560 | Loss: 0.3112 | LM: 0.3045 | LB: 1.1384 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:27:41] Epoch 1 | Step 1570 | Loss: 0.3113 | LM: 0.3046 | LB: 1.1383 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:27:48] Epoch 1 | Step 1580 | Loss: 0.3112 | LM: 0.3049 | LB: 1.1382 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:27:55] Epoch 1 | Step 1590 | Loss: 0.3112 | LM: 0.3055 | LB: 1.1383 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:28:02] Epoch 1 | Step 1600 | Loss: 0.3111 | LM: 0.3052 | LB: 1.1382 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.459/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:28:10] Epoch 1 | Step 1610 | Loss: 0.3107 | LM: 0.3047 | LB: 1.1382 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.458/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:28:17] Epoch 1 | Step 1620 | Loss: 0.3104 | LM: 0.3044 | LB: 1.1380 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.458/SR1: 0.435 | LR: 1.00e-04 +[2026-04-25 20:28:24] Epoch 1 | Step 1630 | Loss: 0.3104 | LM: 0.3043 | LB: 1.1379 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:28:31] Epoch 1 | Step 1640 | Loss: 0.3102 | LM: 0.3043 | LB: 1.1376 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:28:38] Epoch 1 | Step 1650 | Loss: 0.3100 | LM: 0.3044 | LB: 1.1376 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:28:45] Epoch 1 | Step 1660 | Loss: 0.3097 | LM: 0.3036 | LB: 1.1375 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:28:52] Epoch 1 | Step 1670 | Loss: 0.3098 | LM: 0.3036 | LB: 1.1374 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:28:59] Epoch 1 | Step 1680 | Loss: 0.3099 | LM: 0.3033 | LB: 1.1374 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:29:06] Epoch 1 | Step 1690 | Loss: 0.3097 | LM: 0.3031 | LB: 1.1373 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:29:14] Epoch 1 | Step 1700 | Loss: 0.3093 | LM: 0.3025 | LB: 1.1372 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:29:21] Epoch 1 | Step 1710 | Loss: 0.3092 | LM: 0.3018 | LB: 1.1371 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.434 | LR: 1.00e-04 +[2026-04-25 20:29:28] Epoch 1 | Step 1720 | Loss: 0.3091 | LM: 0.3019 | LB: 1.1369 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:29:35] Epoch 1 | Step 1730 | Loss: 0.3089 | LM: 0.3019 | LB: 1.1368 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:29:42] Epoch 1 | Step 1740 | Loss: 0.3089 | LM: 0.3021 | LB: 1.1367 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:29:49] Epoch 1 | Step 1750 | Loss: 0.3091 | LM: 0.3023 | LB: 1.1366 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:29:56] Epoch 1 | Step 1760 | Loss: 0.3090 | LM: 0.3022 | LB: 1.1364 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:30:03] Epoch 1 | Step 1770 | Loss: 0.3090 | LM: 0.3023 | LB: 1.1363 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:30:11] Epoch 1 | Step 1780 | Loss: 0.3089 | LM: 0.3021 | LB: 1.1362 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:30:18] Epoch 1 | Step 1790 | Loss: 0.3090 | LM: 0.3018 | LB: 1.1361 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.433 | LR: 1.00e-04 +[2026-04-25 20:30:25] Epoch 1 | Step 1800 | Loss: 0.3089 | LM: 0.3017 | LB: 1.1360 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.457/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:30:32] Epoch 1 | Step 1810 | Loss: 0.3088 | LM: 0.3017 | LB: 1.1359 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:30:39] Epoch 1 | Step 1820 | Loss: 0.3089 | LM: 0.3016 | LB: 1.1358 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:30:46] Epoch 1 | Step 1830 | Loss: 0.3089 | LM: 0.3023 | LB: 1.1357 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:30:53] Epoch 1 | Step 1840 | Loss: 0.3088 | LM: 0.3024 | LB: 1.1356 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:31:00] Epoch 1 | Step 1850 | Loss: 0.3086 | LM: 0.3022 | LB: 1.1355 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:31:07] Epoch 1 | Step 1860 | Loss: 0.3087 | LM: 0.3018 | LB: 1.1354 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:31:14] Epoch 1 | Step 1870 | Loss: 0.3085 | LM: 0.3014 | LB: 1.1354 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:31:22] Epoch 1 | Step 1880 | Loss: 0.3084 | LM: 0.3015 | LB: 1.1352 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:31:29] Epoch 1 | Step 1890 | Loss: 0.3084 | LM: 0.3012 | LB: 1.1350 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.456/SR1: 0.432 | LR: 1.00e-04 +[2026-04-25 20:31:36] Epoch 1 | Step 1900 | Loss: 0.3083 | LM: 0.3013 | LB: 1.1348 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:31:43] Epoch 1 | Step 1910 | Loss: 0.3083 | LM: 0.3013 | LB: 1.1348 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:31:50] Epoch 1 | Step 1920 | Loss: 0.3083 | LM: 0.3010 | LB: 1.1347 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:31:57] Epoch 1 | Step 1930 | Loss: 0.3082 | LM: 0.3009 | LB: 1.1346 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:04] Epoch 1 | Step 1940 | Loss: 0.3081 | LM: 0.3010 | LB: 1.1346 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:11] Epoch 1 | Step 1950 | Loss: 0.3080 | LM: 0.3010 | LB: 1.1344 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:18] Epoch 1 | Step 1960 | Loss: 0.3080 | LM: 0.3012 | LB: 1.1343 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:25] Epoch 1 | Step 1970 | Loss: 0.3081 | LM: 0.3015 | LB: 1.1342 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:32] Epoch 1 | Step 1980 | Loss: 0.3080 | LM: 0.3014 | LB: 1.1342 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:39] Epoch 1 | Step 1990 | Loss: 0.3080 | LM: 0.3015 | LB: 1.1341 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:46] Epoch 1 | Step 2000 | Loss: 0.3080 | LM: 0.3013 | LB: 1.1340 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.455/SR1: 0.431 | LR: 1.00e-04 +[2026-04-25 20:32:47] Validation | Batch 10/42 | Loss: 0.2826 | LM_LOSS: 0.2715 | LB_LOSS: 1.1078 +[2026-04-25 20:32:49] Validation | Batch 20/42 | Loss: 0.3051 | LM_LOSS: 0.2940 | LB_LOSS: 1.1106 +[2026-04-25 20:32:50] Validation | Batch 30/42 | Loss: 0.2964 | LM_LOSS: 0.2853 | LB_LOSS: 1.1074 +[2026-04-25 20:32:51] Validation | Batch 40/42 | Loss: 0.3008 | LM_LOSS: 0.2898 | LB_LOSS: 1.1064 +[2026-04-25 20:32:51] Validation | Batch 42/42 | Loss: 0.3003 | LM_LOSS: 0.2892 | LB_LOSS: 1.1066 +[2026-04-25 20:32:51] Validation | Loss: 0.3003 | LM_LOSS: 0.2892 | LB_LOSS: 1.1066 | PPL: 1.33 | Time: 4.80s +[2026-04-25 20:32:54] New best model saved! Val loss: 0.3003 +[2026-04-25 20:33:01] Epoch 1 | Step 2010 | Loss: 0.3081 | LM: 0.3015 | LB: 1.1340 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.455/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:08] Epoch 1 | Step 2020 | Loss: 0.3079 | LM: 0.3015 | LB: 1.1338 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:15] Epoch 1 | Step 2030 | Loss: 0.3079 | LM: 0.3012 | LB: 1.1337 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:22] Epoch 1 | Step 2040 | Loss: 0.3078 | LM: 0.3009 | LB: 1.1336 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:29] Epoch 1 | Step 2050 | Loss: 0.3077 | LM: 0.3008 | LB: 1.1334 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:36] Epoch 1 | Step 2060 | Loss: 0.3077 | LM: 0.3010 | LB: 1.1334 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:43] Epoch 1 | Step 2070 | Loss: 0.3073 | LM: 0.3005 | LB: 1.1332 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:51] Epoch 1 | Step 2080 | Loss: 0.3071 | LM: 0.3003 | LB: 1.1331 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.430 | LR: 1.00e-04 +[2026-04-25 20:33:58] Epoch 1 | Step 2090 | Loss: 0.3071 | LM: 0.3005 | LB: 1.1329 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:05] Epoch 1 | Step 2100 | Loss: 0.3070 | LM: 0.3001 | LB: 1.1328 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:12] Epoch 1 | Step 2110 | Loss: 0.3070 | LM: 0.3000 | LB: 1.1326 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:19] Epoch 1 | Step 2120 | Loss: 0.3069 | LM: 0.3002 | LB: 1.1326 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:26] Epoch 1 | Step 2130 | Loss: 0.3068 | LM: 0.3004 | LB: 1.1325 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:33] Epoch 1 | Step 2140 | Loss: 0.3066 | LM: 0.3007 | LB: 1.1324 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:40] Epoch 1 | Step 2150 | Loss: 0.3066 | LM: 0.3006 | LB: 1.1324 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:47] Epoch 1 | Step 2160 | Loss: 0.3065 | LM: 0.3005 | LB: 1.1323 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:34:54] Epoch 1 | Step 2170 | Loss: 0.3065 | LM: 0.3003 | LB: 1.1322 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:35:01] Epoch 1 | Step 2180 | Loss: 0.3063 | LM: 0.3002 | LB: 1.1321 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:35:08] Epoch 1 | Step 2190 | Loss: 0.3062 | LM: 0.2999 | LB: 1.1320 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.453/SR1: 0.429 | LR: 1.00e-04 +[2026-04-25 20:35:15] Epoch 1 | Step 2200 | Loss: 0.3062 | LM: 0.3000 | LB: 1.1319 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.453/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:35:22] Epoch 1 | Step 2210 | Loss: 0.3062 | LM: 0.3001 | LB: 1.1318 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:35:30] Epoch 1 | Step 2220 | Loss: 0.3063 | LM: 0.3002 | LB: 1.1317 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:35:37] Epoch 1 | Step 2230 | Loss: 0.3064 | LM: 0.3002 | LB: 1.1316 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:35:44] Epoch 1 | Step 2240 | Loss: 0.3066 | LM: 0.2999 | LB: 1.1315 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:35:51] Epoch 1 | Step 2250 | Loss: 0.3066 | LM: 0.3000 | LB: 1.1314 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:35:58] Epoch 1 | Step 2260 | Loss: 0.3065 | LM: 0.2996 | LB: 1.1313 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:36:05] Epoch 1 | Step 2270 | Loss: 0.3065 | LM: 0.3000 | LB: 1.1312 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:36:12] Epoch 1 | Step 2280 | Loss: 0.3064 | LM: 0.2998 | LB: 1.1311 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:36:19] Epoch 1 | Step 2290 | Loss: 0.3067 | LM: 0.3000 | LB: 1.1309 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.355 | HR1: 0.452/SR1: 0.428 | LR: 1.00e-04 +[2026-04-25 20:36:26] Epoch 1 | Step 2300 | Loss: 0.3066 | LM: 0.2996 | LB: 1.1308 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.355 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:36:33] Epoch 1 | Step 2310 | Loss: 0.3065 | LM: 0.2995 | LB: 1.1307 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:36:40] Epoch 1 | Step 2320 | Loss: 0.3065 | LM: 0.2997 | LB: 1.1306 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:36:47] Epoch 1 | Step 2330 | Loss: 0.3065 | LM: 0.2997 | LB: 1.1305 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:36:55] Epoch 1 | Step 2340 | Loss: 0.3064 | LM: 0.2999 | LB: 1.1305 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:02] Epoch 1 | Step 2350 | Loss: 0.3063 | LM: 0.3001 | LB: 1.1304 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:09] Epoch 1 | Step 2360 | Loss: 0.3064 | LM: 0.3002 | LB: 1.1303 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:16] Epoch 1 | Step 2370 | Loss: 0.3063 | LM: 0.3000 | LB: 1.1302 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:23] Epoch 1 | Step 2380 | Loss: 0.3062 | LM: 0.2998 | LB: 1.1301 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.355 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:30] Epoch 1 | Step 2390 | Loss: 0.3062 | LM: 0.2997 | LB: 1.1301 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:37] Epoch 1 | Step 2400 | Loss: 0.3061 | LM: 0.2999 | LB: 1.1300 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:44] Epoch 1 | Step 2410 | Loss: 0.3061 | LM: 0.3001 | LB: 1.1299 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.427 | LR: 1.00e-04 +[2026-04-25 20:37:51] Epoch 1 | Step 2420 | Loss: 0.3062 | LM: 0.2999 | LB: 1.1297 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:37:58] Epoch 1 | Step 2430 | Loss: 0.3062 | LM: 0.3000 | LB: 1.1297 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:05] Epoch 1 | Step 2440 | Loss: 0.3061 | LM: 0.2999 | LB: 1.1296 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:12] Epoch 1 | Step 2450 | Loss: 0.3061 | LM: 0.2998 | LB: 1.1295 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:19] Epoch 1 | Step 2460 | Loss: 0.3061 | LM: 0.2998 | LB: 1.1294 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:26] Epoch 1 | Step 2470 | Loss: 0.3061 | LM: 0.2996 | LB: 1.1293 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:34] Epoch 1 | Step 2480 | Loss: 0.3060 | LM: 0.2995 | LB: 1.1292 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.355 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:41] Epoch 1 | Step 2490 | Loss: 0.3059 | LM: 0.2996 | LB: 1.1291 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.355 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:48] Epoch 1 | Step 2500 | Loss: 0.3058 | LM: 0.2997 | LB: 1.1290 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.355 | HR1: 0.450/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:38:55] Epoch 1 | Step 2510 | Loss: 0.3057 | LM: 0.2999 | LB: 1.1290 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:39:02] Epoch 1 | Step 2520 | Loss: 0.3055 | LM: 0.2995 | LB: 1.1289 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:39:09] Epoch 1 | Step 2530 | Loss: 0.3053 | LM: 0.2994 | LB: 1.1289 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:39:16] Epoch 1 | Step 2540 | Loss: 0.3053 | LM: 0.2993 | LB: 1.1288 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.426 | LR: 1.00e-04 +[2026-04-25 20:39:23] Epoch 1 | Step 2550 | Loss: 0.3051 | LM: 0.2987 | LB: 1.1288 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:39:30] Epoch 1 | Step 2560 | Loss: 0.3050 | LM: 0.2989 | LB: 1.1288 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:39:38] Epoch 1 | Step 2570 | Loss: 0.3051 | LM: 0.2990 | LB: 1.1288 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:39:45] Epoch 1 | Step 2580 | Loss: 0.3051 | LM: 0.2991 | LB: 1.1287 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:39:52] Epoch 1 | Step 2590 | Loss: 0.3051 | LM: 0.2989 | LB: 1.1286 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:39:59] Epoch 1 | Step 2600 | Loss: 0.3052 | LM: 0.2991 | LB: 1.1285 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:06] Epoch 1 | Step 2610 | Loss: 0.3052 | LM: 0.2987 | LB: 1.1285 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:13] Epoch 1 | Step 2620 | Loss: 0.3051 | LM: 0.2986 | LB: 1.1284 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:20] Epoch 1 | Step 2630 | Loss: 0.3050 | LM: 0.2985 | LB: 1.1284 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:28] Epoch 1 | Step 2640 | Loss: 0.3050 | LM: 0.2983 | LB: 1.1283 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:35] Epoch 1 | Step 2650 | Loss: 0.3048 | LM: 0.2981 | LB: 1.1283 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:42] Epoch 1 | Step 2660 | Loss: 0.3048 | LM: 0.2982 | LB: 1.1282 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:49] Epoch 1 | Step 2670 | Loss: 0.3047 | LM: 0.2985 | LB: 1.1282 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:40:56] Epoch 1 | Step 2680 | Loss: 0.3046 | LM: 0.2985 | LB: 1.1281 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:41:03] Epoch 1 | Step 2690 | Loss: 0.3046 | LM: 0.2986 | LB: 1.1281 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:41:10] Epoch 1 | Step 2700 | Loss: 0.3045 | LM: 0.2986 | LB: 1.1280 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.425 | LR: 1.00e-04 +[2026-04-25 20:41:18] Epoch 1 | Step 2710 | Loss: 0.3043 | LM: 0.2983 | LB: 1.1279 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:41:25] Epoch 1 | Step 2720 | Loss: 0.3043 | LM: 0.2982 | LB: 1.1278 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:41:32] Epoch 1 | Step 2730 | Loss: 0.3042 | LM: 0.2982 | LB: 1.1277 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:41:39] Epoch 1 | Step 2740 | Loss: 0.3043 | LM: 0.2982 | LB: 1.1276 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:41:46] Epoch 1 | Step 2750 | Loss: 0.3043 | LM: 0.2981 | LB: 1.1275 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:41:53] Epoch 1 | Step 2760 | Loss: 0.3041 | LM: 0.2977 | LB: 1.1274 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:00] Epoch 1 | Step 2770 | Loss: 0.3041 | LM: 0.2977 | LB: 1.1273 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:07] Epoch 1 | Step 2780 | Loss: 0.3041 | LM: 0.2975 | LB: 1.1272 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:14] Epoch 1 | Step 2790 | Loss: 0.3040 | LM: 0.2976 | LB: 1.1272 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:21] Epoch 1 | Step 2800 | Loss: 0.3039 | LM: 0.2976 | LB: 1.1271 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:29] Epoch 1 | Step 2810 | Loss: 0.3040 | LM: 0.2975 | LB: 1.1270 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:36] Epoch 1 | Step 2820 | Loss: 0.3040 | LM: 0.2976 | LB: 1.1269 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.424 | LR: 1.00e-04 +[2026-04-25 20:42:43] Epoch 1 | Step 2830 | Loss: 0.3039 | LM: 0.2976 | LB: 1.1269 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:42:50] Epoch 1 | Step 2840 | Loss: 0.3040 | LM: 0.2977 | LB: 1.1269 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:42:57] Epoch 1 | Step 2850 | Loss: 0.3041 | LM: 0.2981 | LB: 1.1268 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:04] Epoch 1 | Step 2860 | Loss: 0.3040 | LM: 0.2981 | LB: 1.1268 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:12] Epoch 1 | Step 2870 | Loss: 0.3039 | LM: 0.2981 | LB: 1.1268 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:19] Epoch 1 | Step 2880 | Loss: 0.3039 | LM: 0.2979 | LB: 1.1267 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:26] Epoch 1 | Step 2890 | Loss: 0.3038 | LM: 0.2977 | LB: 1.1265 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:33] Epoch 1 | Step 2900 | Loss: 0.3037 | LM: 0.2975 | LB: 1.1264 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:40] Epoch 1 | Step 2910 | Loss: 0.3037 | LM: 0.2974 | LB: 1.1263 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:47] Epoch 1 | Step 2920 | Loss: 0.3038 | LM: 0.2977 | LB: 1.1262 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:43:54] Epoch 1 | Step 2930 | Loss: 0.3037 | LM: 0.2975 | LB: 1.1262 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:44:01] Epoch 1 | Step 2940 | Loss: 0.3035 | LM: 0.2973 | LB: 1.1260 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.423 | LR: 1.00e-04 +[2026-04-25 20:44:08] Epoch 1 | Step 2950 | Loss: 0.3036 | LM: 0.2974 | LB: 1.1259 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:15] Epoch 1 | Step 2960 | Loss: 0.3036 | LM: 0.2975 | LB: 1.1259 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:23] Epoch 1 | Step 2970 | Loss: 0.3036 | LM: 0.2977 | LB: 1.1258 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:30] Epoch 1 | Step 2980 | Loss: 0.3035 | LM: 0.2976 | LB: 1.1257 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:37] Epoch 1 | Step 2990 | Loss: 0.3036 | LM: 0.2976 | LB: 1.1256 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:44] Epoch 1 | Step 3000 | Loss: 0.3035 | LM: 0.2976 | LB: 1.1256 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:51] Epoch 1 | Step 3010 | Loss: 0.3035 | LM: 0.2976 | LB: 1.1255 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:44:59] Epoch 1 | Step 3020 | Loss: 0.3033 | LM: 0.2974 | LB: 1.1254 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:06] Epoch 1 | Step 3030 | Loss: 0.3033 | LM: 0.2974 | LB: 1.1254 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:13] Epoch 1 | Step 3040 | Loss: 0.3031 | LM: 0.2972 | LB: 1.1253 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:20] Epoch 1 | Step 3050 | Loss: 0.3030 | LM: 0.2969 | LB: 1.1252 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:27] Epoch 1 | Step 3060 | Loss: 0.3030 | LM: 0.2973 | LB: 1.1252 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:34] Epoch 1 | Step 3070 | Loss: 0.3029 | LM: 0.2972 | LB: 1.1251 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:41] Epoch 1 | Step 3080 | Loss: 0.3028 | LM: 0.2969 | LB: 1.1250 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.446/SR1: 0.422 | LR: 1.00e-04 +[2026-04-25 20:45:48] Epoch 1 | Step 3090 | Loss: 0.3027 | LM: 0.2966 | LB: 1.1249 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:45:55] Epoch 1 | Step 3100 | Loss: 0.3026 | LM: 0.2966 | LB: 1.1248 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:02] Epoch 1 | Step 3110 | Loss: 0.3025 | LM: 0.2965 | LB: 1.1248 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:10] Epoch 1 | Step 3120 | Loss: 0.3027 | LM: 0.2965 | LB: 1.1247 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:17] Epoch 1 | Step 3130 | Loss: 0.3027 | LM: 0.2964 | LB: 1.1247 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:24] Epoch 1 | Step 3140 | Loss: 0.3027 | LM: 0.2963 | LB: 1.1247 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:31] Epoch 1 | Step 3150 | Loss: 0.3028 | LM: 0.2965 | LB: 1.1246 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:38] Epoch 1 | Step 3160 | Loss: 0.3028 | LM: 0.2964 | LB: 1.1245 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:45] Epoch 1 | Step 3170 | Loss: 0.3028 | LM: 0.2965 | LB: 1.1244 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:52] Epoch 1 | Step 3180 | Loss: 0.3028 | LM: 0.2966 | LB: 1.1244 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:46:59] Epoch 1 | Step 3190 | Loss: 0.3026 | LM: 0.2964 | LB: 1.1243 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:47:06] Epoch 1 | Step 3200 | Loss: 0.3025 | LM: 0.2961 | LB: 1.1242 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:47:13] Epoch 1 | Step 3210 | Loss: 0.3025 | LM: 0.2958 | LB: 1.1241 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:47:21] Epoch 1 | Step 3220 | Loss: 0.3023 | LM: 0.2957 | LB: 1.1241 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:47:28] Epoch 1 | Step 3230 | Loss: 0.3023 | LM: 0.2956 | LB: 1.1240 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.421 | LR: 1.00e-04 +[2026-04-25 20:47:35] Epoch 1 | Step 3240 | Loss: 0.3022 | LM: 0.2954 | LB: 1.1240 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:47:42] Epoch 1 | Step 3250 | Loss: 0.3022 | LM: 0.2955 | LB: 1.1239 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:47:49] Epoch 1 | Step 3260 | Loss: 0.3022 | LM: 0.2952 | LB: 1.1238 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:47:56] Epoch 1 | Step 3270 | Loss: 0.3021 | LM: 0.2953 | LB: 1.1238 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:03] Epoch 1 | Step 3280 | Loss: 0.3020 | LM: 0.2950 | LB: 1.1238 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:10] Epoch 1 | Step 3290 | Loss: 0.3019 | LM: 0.2950 | LB: 1.1238 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:17] Epoch 1 | Step 3300 | Loss: 0.3019 | LM: 0.2952 | LB: 1.1238 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:25] Epoch 1 | Step 3310 | Loss: 0.3018 | LM: 0.2951 | LB: 1.1237 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:32] Epoch 1 | Step 3320 | Loss: 0.3018 | LM: 0.2950 | LB: 1.1236 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:39] Epoch 1 | Step 3330 | Loss: 0.3018 | LM: 0.2950 | LB: 1.1236 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:46] Epoch 1 | Step 3340 | Loss: 0.3018 | LM: 0.2952 | LB: 1.1235 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:48:53] Epoch 1 | Step 3350 | Loss: 0.3016 | LM: 0.2951 | LB: 1.1235 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:49:00] Epoch 1 | Step 3360 | Loss: 0.3015 | LM: 0.2949 | LB: 1.1234 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:49:07] Epoch 1 | Step 3370 | Loss: 0.3016 | LM: 0.2948 | LB: 1.1233 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:49:14] Epoch 1 | Step 3380 | Loss: 0.3014 | LM: 0.2947 | LB: 1.1233 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:49:21] Epoch 1 | Step 3390 | Loss: 0.3015 | LM: 0.2947 | LB: 1.1233 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:49:29] Epoch 1 | Step 3400 | Loss: 0.3017 | LM: 0.2948 | LB: 1.1232 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.420 | LR: 1.00e-04 +[2026-04-25 20:49:36] Epoch 1 | Step 3410 | Loss: 0.3017 | LM: 0.2949 | LB: 1.1231 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.444/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:49:43] Epoch 1 | Step 3420 | Loss: 0.3016 | LM: 0.2950 | LB: 1.1231 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:49:50] Epoch 1 | Step 3430 | Loss: 0.3015 | LM: 0.2952 | LB: 1.1230 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:49:57] Epoch 1 | Step 3440 | Loss: 0.3015 | LM: 0.2953 | LB: 1.1229 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:04] Epoch 1 | Step 3450 | Loss: 0.3015 | LM: 0.2953 | LB: 1.1228 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:11] Epoch 1 | Step 3460 | Loss: 0.3014 | LM: 0.2950 | LB: 1.1227 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:19] Epoch 1 | Step 3470 | Loss: 0.3014 | LM: 0.2949 | LB: 1.1227 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:26] Epoch 1 | Step 3480 | Loss: 0.3014 | LM: 0.2950 | LB: 1.1226 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:33] Epoch 1 | Step 3490 | Loss: 0.3013 | LM: 0.2950 | LB: 1.1226 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:40] Epoch 1 | Step 3500 | Loss: 0.3012 | LM: 0.2948 | LB: 1.1225 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:47] Epoch 1 | Step 3510 | Loss: 0.3012 | LM: 0.2948 | LB: 1.1224 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:50:54] Epoch 1 | Step 3520 | Loss: 0.3011 | LM: 0.2949 | LB: 1.1224 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:51:01] Epoch 1 | Step 3530 | Loss: 0.3012 | LM: 0.2947 | LB: 1.1223 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:51:08] Epoch 1 | Step 3540 | Loss: 0.3011 | LM: 0.2945 | LB: 1.1222 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.419 | LR: 1.00e-04 +[2026-04-25 20:51:15] Epoch 1 | Step 3550 | Loss: 0.3011 | LM: 0.2945 | LB: 1.1222 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:51:23] Epoch 1 | Step 3560 | Loss: 0.3011 | LM: 0.2946 | LB: 1.1221 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:51:30] Epoch 1 | Step 3570 | Loss: 0.3011 | LM: 0.2945 | LB: 1.1220 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:51:37] Epoch 1 | Step 3580 | Loss: 0.3010 | LM: 0.2945 | LB: 1.1219 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:51:44] Epoch 1 | Step 3590 | Loss: 0.3010 | LM: 0.2945 | LB: 1.1219 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:51:51] Epoch 1 | Step 3600 | Loss: 0.3009 | LM: 0.2945 | LB: 1.1218 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:51:58] Epoch 1 | Step 3610 | Loss: 0.3008 | LM: 0.2943 | LB: 1.1217 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:05] Epoch 1 | Step 3620 | Loss: 0.3007 | LM: 0.2940 | LB: 1.1217 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:12] Epoch 1 | Step 3630 | Loss: 0.3008 | LM: 0.2942 | LB: 1.1216 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:19] Epoch 1 | Step 3640 | Loss: 0.3009 | LM: 0.2943 | LB: 1.1215 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:26] Epoch 1 | Step 3650 | Loss: 0.3009 | LM: 0.2945 | LB: 1.1215 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:34] Epoch 1 | Step 3660 | Loss: 0.3008 | LM: 0.2945 | LB: 1.1215 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:41] Epoch 1 | Step 3670 | Loss: 0.3008 | LM: 0.2942 | LB: 1.1214 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:48] Epoch 1 | Step 3680 | Loss: 0.3007 | LM: 0.2943 | LB: 1.1213 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.418 | LR: 1.00e-04 +[2026-04-25 20:52:55] Epoch 1 | Step 3690 | Loss: 0.3008 | LM: 0.2944 | LB: 1.1212 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:02] Epoch 1 | Step 3700 | Loss: 0.3007 | LM: 0.2943 | LB: 1.1211 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:09] Epoch 1 | Step 3710 | Loss: 0.3006 | LM: 0.2941 | LB: 1.1211 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:16] Epoch 1 | Step 3720 | Loss: 0.3006 | LM: 0.2943 | LB: 1.1211 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:23] Epoch 1 | Step 3730 | Loss: 0.3006 | LM: 0.2942 | LB: 1.1210 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:31] Epoch 1 | Step 3740 | Loss: 0.3006 | LM: 0.2941 | LB: 1.1209 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:38] Epoch 1 | Step 3750 | Loss: 0.3005 | LM: 0.2939 | LB: 1.1208 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:45] Epoch 1 | Step 3760 | Loss: 0.3006 | LM: 0.2940 | LB: 1.1208 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:52] Epoch 1 | Step 3770 | Loss: 0.3006 | LM: 0.2941 | LB: 1.1207 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:53:59] Epoch 1 | Step 3780 | Loss: 0.3006 | LM: 0.2940 | LB: 1.1207 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:06] Epoch 1 | Step 3790 | Loss: 0.3006 | LM: 0.2940 | LB: 1.1206 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:13] Epoch 1 | Step 3800 | Loss: 0.3006 | LM: 0.2941 | LB: 1.1206 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:20] Epoch 1 | Step 3810 | Loss: 0.3004 | LM: 0.2939 | LB: 1.1205 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:27] Epoch 1 | Step 3820 | Loss: 0.3004 | LM: 0.2937 | LB: 1.1204 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:34] Epoch 1 | Step 3830 | Loss: 0.3004 | LM: 0.2938 | LB: 1.1204 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:41] Epoch 1 | Step 3840 | Loss: 0.3004 | LM: 0.2939 | LB: 1.1203 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.417 | LR: 1.00e-04 +[2026-04-25 20:54:48] Epoch 1 | Step 3850 | Loss: 0.3003 | LM: 0.2938 | LB: 1.1203 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:54:55] Epoch 1 | Step 3860 | Loss: 0.3003 | LM: 0.2938 | LB: 1.1202 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:02] Epoch 1 | Step 3870 | Loss: 0.3003 | LM: 0.2936 | LB: 1.1202 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.441/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:09] Epoch 1 | Step 3880 | Loss: 0.3001 | LM: 0.2933 | LB: 1.1201 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.441/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:17] Epoch 1 | Step 3890 | Loss: 0.3000 | LM: 0.2934 | LB: 1.1201 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:24] Epoch 1 | Step 3900 | Loss: 0.3000 | LM: 0.2933 | LB: 1.1200 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:31] Epoch 1 | Step 3910 | Loss: 0.3000 | LM: 0.2934 | LB: 1.1200 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:38] Epoch 1 | Step 3920 | Loss: 0.3001 | LM: 0.2933 | LB: 1.1200 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:45] Epoch 1 | Step 3930 | Loss: 0.3000 | LM: 0.2933 | LB: 1.1199 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:52] Epoch 1 | Step 3940 | Loss: 0.3000 | LM: 0.2932 | LB: 1.1198 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:55:59] Epoch 1 | Step 3950 | Loss: 0.2999 | LM: 0.2929 | LB: 1.1198 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:56:06] Epoch 1 | Step 3960 | Loss: 0.2999 | LM: 0.2929 | LB: 1.1197 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 1.00e-04 +[2026-04-25 20:56:13] Epoch 1 | Step 3970 | Loss: 0.2998 | LM: 0.2929 | LB: 1.1197 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 9.99e-05 +[2026-04-25 20:56:20] Epoch 1 | Step 3980 | Loss: 0.2998 | LM: 0.2929 | LB: 1.1196 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 9.99e-05 +[2026-04-25 20:56:27] Epoch 1 | Step 3990 | Loss: 0.2998 | LM: 0.2931 | LB: 1.1195 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 9.97e-05 +[2026-04-25 20:56:34] Epoch 1 | Step 4000 | Loss: 0.2998 | LM: 0.2931 | LB: 1.1195 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 9.95e-05 +[2026-04-25 20:56:35] Validation | Batch 10/42 | Loss: 0.2820 | LM_LOSS: 0.2711 | LB_LOSS: 1.0925 +[2026-04-25 20:56:36] Validation | Batch 20/42 | Loss: 0.3017 | LM_LOSS: 0.2907 | LB_LOSS: 1.0959 +[2026-04-25 20:56:38] Validation | Batch 30/42 | Loss: 0.2930 | LM_LOSS: 0.2820 | LB_LOSS: 1.0932 +[2026-04-25 20:56:39] Validation | Batch 40/42 | Loss: 0.2970 | LM_LOSS: 0.2861 | LB_LOSS: 1.0924 +[2026-04-25 20:56:39] Validation | Batch 42/42 | Loss: 0.2964 | LM_LOSS: 0.2855 | LB_LOSS: 1.0926 +[2026-04-25 20:56:39] Validation | Loss: 0.2964 | LM_LOSS: 0.2855 | LB_LOSS: 1.0926 | PPL: 1.33 | Time: 4.75s +[2026-04-25 20:56:42] New best model saved! Val loss: 0.2964 +[2026-04-25 20:56:49] Epoch 1 | Step 4010 | Loss: 0.2997 | LM: 0.2931 | LB: 1.1194 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.416 | LR: 9.93e-05 +[2026-04-25 20:56:56] Epoch 1 | Step 4020 | Loss: 0.2997 | LM: 0.2931 | LB: 1.1193 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.415 | LR: 9.90e-05 +[2026-04-25 20:57:03] Epoch 1 | Step 4030 | Loss: 0.2996 | LM: 0.2930 | LB: 1.1193 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.415 | LR: 9.87e-05 +[2026-04-25 20:57:10] Epoch 1 | Step 4040 | Loss: 0.2995 | LM: 0.2928 | LB: 1.1192 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.415 | LR: 9.84e-05 +[2026-04-25 20:57:18] Epoch 1 | Step 4050 | Loss: 0.2994 | LM: 0.2928 | LB: 1.1191 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.415 | LR: 9.80e-05 +[2026-04-25 20:57:25] Epoch 1 | Step 4060 | Loss: 0.2992 | LM: 0.2926 | LB: 1.1191 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.415 | LR: 9.75e-05 +[2026-04-25 20:57:32] Epoch 1 | Step 4070 | Loss: 0.2992 | LM: 0.2927 | LB: 1.1190 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.415 | LR: 9.70e-05 +[2026-04-25 20:57:39] Epoch 1 | Step 4080 | Loss: 0.2992 | LM: 0.2928 | LB: 1.1190 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.65e-05 +[2026-04-25 20:57:46] Epoch 1 | Step 4090 | Loss: 0.2992 | LM: 0.2927 | LB: 1.1189 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.59e-05 +[2026-04-25 20:57:53] Epoch 1 | Step 4100 | Loss: 0.2993 | LM: 0.2927 | LB: 1.1188 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.53e-05 +[2026-04-25 20:58:00] Epoch 1 | Step 4110 | Loss: 0.2992 | LM: 0.2928 | LB: 1.1188 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.46e-05 +[2026-04-25 20:58:07] Epoch 1 | Step 4120 | Loss: 0.2993 | LM: 0.2928 | LB: 1.1187 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.40e-05 +[2026-04-25 20:58:14] Epoch 1 | Step 4130 | Loss: 0.2992 | LM: 0.2926 | LB: 1.1187 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.32e-05 +[2026-04-25 20:58:21] Epoch 1 | Step 4140 | Loss: 0.2992 | LM: 0.2927 | LB: 1.1186 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.24e-05 +[2026-04-25 20:58:28] Epoch 1 | Step 4150 | Loss: 0.2994 | LM: 0.2929 | LB: 1.1186 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.16e-05 +[2026-04-25 20:58:35] Epoch 1 | Step 4160 | Loss: 0.2994 | LM: 0.2930 | LB: 1.1185 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 9.08e-05 +[2026-04-25 20:58:42] Epoch 1 | Step 4170 | Loss: 0.2994 | LM: 0.2932 | LB: 1.1186 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 8.99e-05 +[2026-04-25 20:58:49] Epoch 1 | Step 4180 | Loss: 0.2994 | LM: 0.2933 | LB: 1.1185 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 8.90e-05 +[2026-04-25 20:58:56] Epoch 1 | Step 4190 | Loss: 0.2993 | LM: 0.2933 | LB: 1.1184 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.415 | LR: 8.80e-05 +[2026-04-25 20:59:03] Epoch 1 | Step 4200 | Loss: 0.2994 | LM: 0.2934 | LB: 1.1184 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.70e-05 +[2026-04-25 20:59:10] Epoch 1 | Step 4210 | Loss: 0.2994 | LM: 0.2933 | LB: 1.1183 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.60e-05 +[2026-04-25 20:59:18] Epoch 1 | Step 4220 | Loss: 0.2996 | LM: 0.2935 | LB: 1.1183 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.50e-05 +[2026-04-25 20:59:25] Epoch 1 | Step 4230 | Loss: 0.2996 | LM: 0.2935 | LB: 1.1183 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.39e-05 +[2026-04-25 20:59:32] Epoch 1 | Step 4240 | Loss: 0.2997 | LM: 0.2937 | LB: 1.1182 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.28e-05 +[2026-04-25 20:59:39] Epoch 1 | Step 4250 | Loss: 0.2997 | LM: 0.2937 | LB: 1.1182 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.16e-05 +[2026-04-25 20:59:46] Epoch 1 | Step 4260 | Loss: 0.2996 | LM: 0.2934 | LB: 1.1182 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 8.05e-05 +[2026-04-25 20:59:53] Epoch 1 | Step 4270 | Loss: 0.2997 | LM: 0.2934 | LB: 1.1181 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 7.93e-05 +[2026-04-25 21:00:00] Epoch 1 | Step 4280 | Loss: 0.2996 | LM: 0.2933 | LB: 1.1181 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 7.81e-05 +[2026-04-25 21:00:07] Epoch 1 | Step 4290 | Loss: 0.2995 | LM: 0.2931 | LB: 1.1181 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 7.68e-05 +[2026-04-25 21:00:14] Epoch 1 | Step 4300 | Loss: 0.2995 | LM: 0.2931 | LB: 1.1180 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 7.56e-05 +[2026-04-25 21:00:21] Epoch 1 | Step 4310 | Loss: 0.2996 | LM: 0.2930 | LB: 1.1180 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 7.43e-05 +[2026-04-25 21:00:28] Epoch 1 | Step 4320 | Loss: 0.2996 | LM: 0.2929 | LB: 1.1179 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.439/SR1: 0.414 | LR: 7.30e-05 +[2026-04-25 21:00:35] Epoch 1 | Step 4330 | Loss: 0.2996 | LM: 0.2930 | LB: 1.1179 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.414 | LR: 7.16e-05 +[2026-04-25 21:00:42] Epoch 1 | Step 4340 | Loss: 0.2995 | LM: 0.2931 | LB: 1.1179 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.414 | LR: 7.03e-05 +[2026-04-25 21:00:50] Epoch 1 | Step 4350 | Loss: 0.2994 | LM: 0.2931 | LB: 1.1178 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.414 | LR: 6.90e-05 +[2026-04-25 21:00:57] Epoch 1 | Step 4360 | Loss: 0.2994 | LM: 0.2930 | LB: 1.1178 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.414 | LR: 6.76e-05 +[2026-04-25 21:01:04] Epoch 1 | Step 4370 | Loss: 0.2994 | LM: 0.2932 | LB: 1.1177 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.414 | LR: 6.62e-05 +[2026-04-25 21:01:11] Epoch 1 | Step 4380 | Loss: 0.2993 | LM: 0.2931 | LB: 1.1176 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.414 | LR: 6.48e-05 +[2026-04-25 21:01:18] Epoch 1 | Step 4390 | Loss: 0.2993 | LM: 0.2930 | LB: 1.1176 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 6.34e-05 +[2026-04-25 21:01:25] Epoch 1 | Step 4400 | Loss: 0.2993 | LM: 0.2928 | LB: 1.1175 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 6.20e-05 +[2026-04-25 21:01:32] Epoch 1 | Step 4410 | Loss: 0.2992 | LM: 0.2927 | LB: 1.1175 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 6.06e-05 +[2026-04-25 21:01:39] Epoch 1 | Step 4420 | Loss: 0.2992 | LM: 0.2928 | LB: 1.1174 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.92e-05 +[2026-04-25 21:01:46] Epoch 1 | Step 4430 | Loss: 0.2992 | LM: 0.2928 | LB: 1.1174 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.78e-05 +[2026-04-25 21:01:53] Epoch 1 | Step 4440 | Loss: 0.2993 | LM: 0.2928 | LB: 1.1173 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.63e-05 +[2026-04-25 21:02:00] Epoch 1 | Step 4450 | Loss: 0.2992 | LM: 0.2926 | LB: 1.1173 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.49e-05 +[2026-04-25 21:02:07] Epoch 1 | Step 4460 | Loss: 0.2993 | LM: 0.2926 | LB: 1.1172 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.35e-05 +[2026-04-25 21:02:14] Epoch 1 | Step 4470 | Loss: 0.2992 | LM: 0.2925 | LB: 1.1171 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.20e-05 +[2026-04-25 21:02:21] Epoch 1 | Step 4480 | Loss: 0.2991 | LM: 0.2924 | LB: 1.1171 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 5.06e-05 +[2026-04-25 21:02:29] Epoch 1 | Step 4490 | Loss: 0.2990 | LM: 0.2924 | LB: 1.1170 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.92e-05 +[2026-04-25 21:02:36] Epoch 1 | Step 4500 | Loss: 0.2989 | LM: 0.2923 | LB: 1.1170 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.78e-05 +[2026-04-25 21:02:43] Epoch 1 | Step 4510 | Loss: 0.2988 | LM: 0.2922 | LB: 1.1170 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.64e-05 +[2026-04-25 21:02:50] Epoch 1 | Step 4520 | Loss: 0.2988 | LM: 0.2922 | LB: 1.1169 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.50e-05 +[2026-04-25 21:02:57] Epoch 1 | Step 4530 | Loss: 0.2987 | LM: 0.2920 | LB: 1.1169 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.36e-05 +[2026-04-25 21:03:04] Epoch 1 | Step 4540 | Loss: 0.2987 | LM: 0.2921 | LB: 1.1168 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.22e-05 +[2026-04-25 21:03:11] Epoch 1 | Step 4550 | Loss: 0.2986 | LM: 0.2920 | LB: 1.1168 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.438/SR1: 0.413 | LR: 4.08e-05 +[2026-04-25 21:03:18] Epoch 1 | Step 4560 | Loss: 0.2986 | LM: 0.2920 | LB: 1.1168 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.413 | LR: 3.95e-05 +[2026-04-25 21:03:25] Epoch 1 | Step 4570 | Loss: 0.2986 | LM: 0.2919 | LB: 1.1167 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.413 | LR: 3.82e-05 +[2026-04-25 21:03:32] Epoch 1 | Step 4580 | Loss: 0.2986 | LM: 0.2919 | LB: 1.1167 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.413 | LR: 3.68e-05 +[2026-04-25 21:03:39] Epoch 1 | Step 4590 | Loss: 0.2985 | LM: 0.2919 | LB: 1.1167 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.413 | LR: 3.55e-05 +[2026-04-25 21:03:46] Epoch 1 | Step 4600 | Loss: 0.2985 | LM: 0.2918 | LB: 1.1167 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 3.43e-05 +[2026-04-25 21:03:54] Epoch 1 | Step 4610 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1166 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 3.30e-05 +[2026-04-25 21:04:01] Epoch 1 | Step 4620 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1166 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 3.18e-05 +[2026-04-25 21:04:08] Epoch 1 | Step 4630 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1165 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 3.05e-05 +[2026-04-25 21:04:15] Epoch 1 | Step 4640 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1165 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.94e-05 +[2026-04-25 21:04:22] Epoch 1 | Step 4650 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1165 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.82e-05 +[2026-04-25 21:04:29] Epoch 1 | Step 4660 | Loss: 0.2984 | LM: 0.2915 | LB: 1.1164 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.71e-05 +[2026-04-25 21:04:36] Epoch 1 | Step 4670 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1164 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.60e-05 +[2026-04-25 21:04:43] Epoch 1 | Step 4680 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1163 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.49e-05 +[2026-04-25 21:04:50] Epoch 1 | Step 4690 | Loss: 0.2984 | LM: 0.2915 | LB: 1.1163 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.38e-05 +[2026-04-25 21:04:57] Epoch 1 | Step 4700 | Loss: 0.2984 | LM: 0.2916 | LB: 1.1162 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.28e-05 +[2026-04-25 21:05:04] Epoch 1 | Step 4710 | Loss: 0.2984 | LM: 0.2915 | LB: 1.1162 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.18e-05 +[2026-04-25 21:05:12] Epoch 1 | Step 4720 | Loss: 0.2983 | LM: 0.2915 | LB: 1.1162 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.09e-05 +[2026-04-25 21:05:19] Epoch 1 | Step 4730 | Loss: 0.2983 | LM: 0.2915 | LB: 1.1161 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 2.00e-05 +[2026-04-25 21:05:26] Epoch 1 | Step 4740 | Loss: 0.2983 | LM: 0.2915 | LB: 1.1161 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.91e-05 +[2026-04-25 21:05:33] Epoch 1 | Step 4750 | Loss: 0.2983 | LM: 0.2914 | LB: 1.1160 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.82e-05 +[2026-04-25 21:05:40] Epoch 1 | Step 4760 | Loss: 0.2983 | LM: 0.2913 | LB: 1.1160 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.74e-05 +[2026-04-25 21:05:47] Epoch 1 | Step 4770 | Loss: 0.2982 | LM: 0.2911 | LB: 1.1160 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.67e-05 +[2026-04-25 21:05:54] Epoch 1 | Step 4780 | Loss: 0.2982 | LM: 0.2912 | LB: 1.1160 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.59e-05 +[2026-04-25 21:06:01] Epoch 1 | Step 4790 | Loss: 0.2982 | LM: 0.2911 | LB: 1.1159 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.52e-05 +[2026-04-25 21:06:08] Epoch 1 | Step 4800 | Loss: 0.2981 | LM: 0.2910 | LB: 1.1159 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.46e-05 +[2026-04-25 21:06:16] Epoch 1 | Step 4810 | Loss: 0.2981 | LM: 0.2910 | LB: 1.1158 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.40e-05 +[2026-04-25 21:06:23] Epoch 1 | Step 4820 | Loss: 0.2980 | LM: 0.2908 | LB: 1.1158 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.412 | LR: 1.34e-05 +[2026-04-25 21:06:30] Epoch 1 | Step 4830 | Loss: 0.2979 | LM: 0.2906 | LB: 1.1158 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.411 | LR: 1.29e-05 +[2026-04-25 21:06:37] Epoch 1 | Step 4840 | Loss: 0.2979 | LM: 0.2906 | LB: 1.1157 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.24e-05 +[2026-04-25 21:06:44] Epoch 1 | Step 4850 | Loss: 0.2979 | LM: 0.2907 | LB: 1.1157 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.20e-05 +[2026-04-25 21:06:51] Epoch 1 | Step 4860 | Loss: 0.2980 | LM: 0.2906 | LB: 1.1156 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.16e-05 +[2026-04-25 21:06:58] Epoch 1 | Step 4870 | Loss: 0.2980 | LM: 0.2906 | LB: 1.1156 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.12e-05 +[2026-04-25 21:07:05] Epoch 1 | Step 4880 | Loss: 0.2980 | LM: 0.2907 | LB: 1.1156 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.09e-05 +[2026-04-25 21:07:12] Epoch 1 | Step 4890 | Loss: 0.2979 | LM: 0.2907 | LB: 1.1155 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.06e-05 +[2026-04-25 21:07:19] Epoch 1 | Step 4900 | Loss: 0.2979 | LM: 0.2906 | LB: 1.1155 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.04e-05 +[2026-04-25 21:07:26] Epoch 1 | Step 4910 | Loss: 0.2979 | LM: 0.2905 | LB: 1.1155 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.03e-05 +[2026-04-25 21:07:34] Epoch 1 | Step 4920 | Loss: 0.2979 | LM: 0.2905 | LB: 1.1154 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.01e-05 +[2026-04-25 21:07:41] Epoch 1 | Step 4930 | Loss: 0.2978 | LM: 0.2904 | LB: 1.1154 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:07:48] Epoch 1 | Step 4940 | Loss: 0.2978 | LM: 0.2905 | LB: 1.1153 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:07:55] Epoch 1 | Step 4950 | Loss: 0.2978 | LM: 0.2905 | LB: 1.1153 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:02] Epoch 1 | Step 4960 | Loss: 0.2978 | LM: 0.2905 | LB: 1.1153 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:09] Epoch 1 | Step 4970 | Loss: 0.2978 | LM: 0.2904 | LB: 1.1152 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:16] Epoch 1 | Step 4980 | Loss: 0.2977 | LM: 0.2905 | LB: 1.1152 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:23] Epoch 1 | Step 4990 | Loss: 0.2977 | LM: 0.2904 | LB: 1.1151 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:31] Epoch 1 | Step 5000 | Loss: 0.2978 | LM: 0.2905 | LB: 1.1151 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:38] Epoch 1 | Step 5010 | Loss: 0.2977 | LM: 0.2905 | LB: 1.1151 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:45] Epoch 1 | Step 5020 | Loss: 0.2976 | LM: 0.2903 | LB: 1.1151 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:52] Epoch 1 | Step 5030 | Loss: 0.2976 | LM: 0.2903 | LB: 1.1150 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:08:59] Epoch 1 | Step 5040 | Loss: 0.2976 | LM: 0.2901 | LB: 1.1150 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:09:06] Epoch 1 | Step 5050 | Loss: 0.2976 | LM: 0.2902 | LB: 1.1150 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:09:13] Epoch 1 | Step 5060 | Loss: 0.2976 | LM: 0.2901 | LB: 1.1149 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.411 | LR: 1.00e-05 +[2026-04-25 21:09:20] Epoch 1 | Step 5070 | Loss: 0.2976 | LM: 0.2900 | LB: 1.1149 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:09:27] Epoch 1 | Step 5080 | Loss: 0.2976 | LM: 0.2900 | LB: 1.1149 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:09:35] Epoch 1 | Step 5090 | Loss: 0.2976 | LM: 0.2900 | LB: 1.1148 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:09:42] Epoch 1 | Step 5100 | Loss: 0.2975 | LM: 0.2899 | LB: 1.1148 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:09:49] Epoch 1 | Step 5110 | Loss: 0.2975 | LM: 0.2900 | LB: 1.1147 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:09:56] Epoch 1 | Step 5120 | Loss: 0.2975 | LM: 0.2900 | LB: 1.1147 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:03] Epoch 1 | Step 5130 | Loss: 0.2974 | LM: 0.2899 | LB: 1.1147 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.436/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:10] Epoch 1 | Step 5140 | Loss: 0.2974 | LM: 0.2899 | LB: 1.1146 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:17] Epoch 1 | Step 5150 | Loss: 0.2974 | LM: 0.2899 | LB: 1.1146 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:24] Epoch 1 | Step 5160 | Loss: 0.2973 | LM: 0.2898 | LB: 1.1145 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:31] Epoch 1 | Step 5170 | Loss: 0.2972 | LM: 0.2898 | LB: 1.1145 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:38] Epoch 1 | Step 5180 | Loss: 0.2972 | LM: 0.2898 | LB: 1.1145 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:45] Epoch 1 | Step 5190 | Loss: 0.2972 | LM: 0.2898 | LB: 1.1144 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:52] Epoch 1 | Step 5200 | Loss: 0.2972 | LM: 0.2898 | LB: 1.1144 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:10:59] Epoch 1 | Step 5210 | Loss: 0.2972 | LM: 0.2897 | LB: 1.1143 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:06] Epoch 1 | Step 5220 | Loss: 0.2972 | LM: 0.2896 | LB: 1.1143 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:13] Epoch 1 | Step 5230 | Loss: 0.2972 | LM: 0.2895 | LB: 1.1143 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:20] Epoch 1 | Step 5240 | Loss: 0.2972 | LM: 0.2894 | LB: 1.1142 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:28] Epoch 1 | Step 5250 | Loss: 0.2972 | LM: 0.2895 | LB: 1.1142 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:35] Epoch 1 | Step 5260 | Loss: 0.2971 | LM: 0.2895 | LB: 1.1142 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:42] Epoch 1 | Step 5270 | Loss: 0.2971 | LM: 0.2894 | LB: 1.1142 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:49] Epoch 1 | Step 5280 | Loss: 0.2970 | LM: 0.2893 | LB: 1.1141 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:11:56] Epoch 1 | Step 5290 | Loss: 0.2970 | LM: 0.2892 | LB: 1.1141 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:12:03] Epoch 1 | Step 5300 | Loss: 0.2970 | LM: 0.2891 | LB: 1.1141 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:12:10] Epoch 1 | Step 5310 | Loss: 0.2970 | LM: 0.2892 | LB: 1.1141 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:12:17] Epoch 1 | Step 5320 | Loss: 0.2970 | LM: 0.2891 | LB: 1.1140 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.410 | LR: 1.00e-05 +[2026-04-25 21:12:24] Epoch 1 | Step 5330 | Loss: 0.2970 | LM: 0.2891 | LB: 1.1140 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:12:31] Epoch 1 | Step 5340 | Loss: 0.2969 | LM: 0.2891 | LB: 1.1140 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:12:38] Epoch 1 | Step 5350 | Loss: 0.2969 | LM: 0.2890 | LB: 1.1139 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:12:45] Epoch 1 | Step 5360 | Loss: 0.2969 | LM: 0.2890 | LB: 1.1139 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:12:52] Epoch 1 | Step 5370 | Loss: 0.2969 | LM: 0.2889 | LB: 1.1138 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:12:59] Epoch 1 | Step 5380 | Loss: 0.2968 | LM: 0.2888 | LB: 1.1137 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:06] Epoch 1 | Step 5390 | Loss: 0.2967 | LM: 0.2887 | LB: 1.1137 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:14] Epoch 1 | Step 5400 | Loss: 0.2967 | LM: 0.2887 | LB: 1.1137 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:21] Epoch 1 | Step 5410 | Loss: 0.2967 | LM: 0.2886 | LB: 1.1137 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:28] Epoch 1 | Step 5420 | Loss: 0.2966 | LM: 0.2885 | LB: 1.1137 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.435/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:35] Epoch 1 | Step 5430 | Loss: 0.2966 | LM: 0.2885 | LB: 1.1136 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:42] Epoch 1 | Step 5440 | Loss: 0.2966 | LM: 0.2885 | LB: 1.1136 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:49] Epoch 1 | Step 5450 | Loss: 0.2967 | LM: 0.2884 | LB: 1.1135 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:13:56] Epoch 1 | Step 5460 | Loss: 0.2966 | LM: 0.2884 | LB: 1.1135 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:03] Epoch 1 | Step 5470 | Loss: 0.2966 | LM: 0.2884 | LB: 1.1135 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:10] Epoch 1 | Step 5480 | Loss: 0.2966 | LM: 0.2882 | LB: 1.1134 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:17] Epoch 1 | Step 5490 | Loss: 0.2966 | LM: 0.2882 | LB: 1.1134 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:24] Epoch 1 | Step 5500 | Loss: 0.2966 | LM: 0.2884 | LB: 1.1133 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:31] Epoch 1 | Step 5510 | Loss: 0.2966 | LM: 0.2885 | LB: 1.1133 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:38] Epoch 1 | Step 5520 | Loss: 0.2966 | LM: 0.2884 | LB: 1.1133 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:45] Epoch 1 | Step 5530 | Loss: 0.2965 | LM: 0.2884 | LB: 1.1132 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:52] Epoch 1 | Step 5540 | Loss: 0.2965 | LM: 0.2882 | LB: 1.1132 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:14:59] Epoch 1 | Step 5550 | Loss: 0.2964 | LM: 0.2882 | LB: 1.1131 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:15:07] Epoch 1 | Step 5560 | Loss: 0.2964 | LM: 0.2883 | LB: 1.1131 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:15:14] Epoch 1 | Step 5570 | Loss: 0.2965 | LM: 0.2883 | LB: 1.1131 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:15:21] Epoch 1 | Step 5580 | Loss: 0.2965 | LM: 0.2882 | LB: 1.1131 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.409 | LR: 1.00e-05 +[2026-04-25 21:15:28] Epoch 1 | Step 5590 | Loss: 0.2964 | LM: 0.2883 | LB: 1.1131 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:15:35] Epoch 1 | Step 5600 | Loss: 0.2965 | LM: 0.2882 | LB: 1.1130 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:15:42] Epoch 1 | Step 5610 | Loss: 0.2965 | LM: 0.2883 | LB: 1.1130 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:15:49] Epoch 1 | Step 5620 | Loss: 0.2964 | LM: 0.2882 | LB: 1.1130 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:15:56] Epoch 1 | Step 5630 | Loss: 0.2964 | LM: 0.2882 | LB: 1.1130 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:03] Epoch 1 | Step 5640 | Loss: 0.2964 | LM: 0.2884 | LB: 1.1129 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:10] Epoch 1 | Step 5650 | Loss: 0.2964 | LM: 0.2884 | LB: 1.1129 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:17] Epoch 1 | Step 5660 | Loss: 0.2964 | LM: 0.2883 | LB: 1.1129 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:24] Epoch 1 | Step 5670 | Loss: 0.2964 | LM: 0.2883 | LB: 1.1129 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:31] Epoch 1 | Step 5680 | Loss: 0.2963 | LM: 0.2882 | LB: 1.1128 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:38] Epoch 1 | Step 5690 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1128 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:45] Epoch 1 | Step 5700 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:16:52] Epoch 1 | Step 5710 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:00] Epoch 1 | Step 5720 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:07] Epoch 1 | Step 5730 | Loss: 0.2963 | LM: 0.2885 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:14] Epoch 1 | Step 5740 | Loss: 0.2963 | LM: 0.2884 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:21] Epoch 1 | Step 5750 | Loss: 0.2963 | LM: 0.2884 | LB: 1.1126 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:28] Epoch 1 | Step 5760 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1126 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:35] Epoch 1 | Step 5770 | Loss: 0.2964 | LM: 0.2884 | LB: 1.1126 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.434/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:42] Epoch 1 | Step 5780 | Loss: 0.2963 | LM: 0.2884 | LB: 1.1125 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:49] Epoch 1 | Step 5790 | Loss: 0.2964 | LM: 0.2882 | LB: 1.1125 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:17:56] Epoch 1 | Step 5800 | Loss: 0.2964 | LM: 0.2883 | LB: 1.1125 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:03] Epoch 1 | Step 5810 | Loss: 0.2964 | LM: 0.2884 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:10] Epoch 1 | Step 5820 | Loss: 0.2963 | LM: 0.2882 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:17] Epoch 1 | Step 5830 | Loss: 0.2963 | LM: 0.2882 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:24] Epoch 1 | Step 5840 | Loss: 0.2962 | LM: 0.2882 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:31] Epoch 1 | Step 5850 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:38] Epoch 1 | Step 5860 | Loss: 0.2962 | LM: 0.2882 | LB: 1.1123 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:45] Epoch 1 | Step 5870 | Loss: 0.2962 | LM: 0.2881 | LB: 1.1123 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:52] Epoch 1 | Step 5880 | Loss: 0.2963 | LM: 0.2883 | LB: 1.1123 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:18:59] Epoch 1 | Step 5890 | Loss: 0.2962 | LM: 0.2882 | LB: 1.1123 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:19:06] Epoch 1 | Step 5900 | Loss: 0.2962 | LM: 0.2881 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:19:13] Epoch 1 | Step 5910 | Loss: 0.2962 | LM: 0.2882 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:19:20] Epoch 1 | Step 5920 | Loss: 0.2961 | LM: 0.2881 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:19:27] Epoch 1 | Step 5930 | Loss: 0.2962 | LM: 0.2881 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:19:34] Epoch 1 | Step 5940 | Loss: 0.2961 | LM: 0.2882 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.408 | LR: 1.00e-05 +[2026-04-25 21:19:41] Epoch 1 | Step 5950 | Loss: 0.2961 | LM: 0.2881 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:19:48] Epoch 1 | Step 5960 | Loss: 0.2961 | LM: 0.2881 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:19:55] Epoch 1 | Step 5970 | Loss: 0.2962 | LM: 0.2881 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:02] Epoch 1 | Step 5980 | Loss: 0.2962 | LM: 0.2882 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:09] Epoch 1 | Step 5990 | Loss: 0.2962 | LM: 0.2883 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:17] Epoch 1 | Step 6000 | Loss: 0.2962 | LM: 0.2882 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:17] Validation | Batch 10/42 | Loss: 0.2761 | LM_LOSS: 0.2652 | LB_LOSS: 1.0908 +[2026-04-25 21:20:19] Validation | Batch 20/42 | Loss: 0.2970 | LM_LOSS: 0.2861 | LB_LOSS: 1.0943 +[2026-04-25 21:20:20] Validation | Batch 30/42 | Loss: 0.2894 | LM_LOSS: 0.2785 | LB_LOSS: 1.0916 +[2026-04-25 21:20:21] Validation | Batch 40/42 | Loss: 0.2931 | LM_LOSS: 0.2822 | LB_LOSS: 1.0908 +[2026-04-25 21:20:21] Validation | Batch 42/42 | Loss: 0.2926 | LM_LOSS: 0.2817 | LB_LOSS: 1.0911 +[2026-04-25 21:20:21] Validation | Loss: 0.2926 | LM_LOSS: 0.2817 | LB_LOSS: 1.0911 | PPL: 1.32 | Time: 4.74s +[2026-04-25 21:20:24] New best model saved! Val loss: 0.2926 +[2026-04-25 21:20:31] Epoch 1 | Step 6010 | Loss: 0.2962 | LM: 0.2881 | LB: 1.1120 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:38] Epoch 1 | Step 6020 | Loss: 0.2962 | LM: 0.2880 | LB: 1.1120 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:45] Epoch 1 | Step 6030 | Loss: 0.2962 | LM: 0.2880 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:52] Epoch 1 | Step 6040 | Loss: 0.2962 | LM: 0.2880 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:20:59] Epoch 1 | Step 6050 | Loss: 0.2963 | LM: 0.2881 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:06] Epoch 1 | Step 6060 | Loss: 0.2963 | LM: 0.2881 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:13] Epoch 1 | Step 6070 | Loss: 0.2962 | LM: 0.2880 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:20] Epoch 1 | Step 6080 | Loss: 0.2962 | LM: 0.2879 | LB: 1.1118 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:27] Epoch 1 | Step 6090 | Loss: 0.2962 | LM: 0.2879 | LB: 1.1118 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:34] Epoch 1 | Step 6100 | Loss: 0.2963 | LM: 0.2880 | LB: 1.1117 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:41] Epoch 1 | Step 6110 | Loss: 0.2963 | LM: 0.2880 | LB: 1.1117 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:48] Epoch 1 | Step 6120 | Loss: 0.2963 | LM: 0.2880 | LB: 1.1117 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:21:55] Epoch 1 | Step 6130 | Loss: 0.2962 | LM: 0.2879 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:02] Epoch 1 | Step 6140 | Loss: 0.2961 | LM: 0.2878 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:09] Epoch 1 | Step 6150 | Loss: 0.2961 | LM: 0.2878 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:17] Epoch 1 | Step 6160 | Loss: 0.2960 | LM: 0.2877 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:24] Epoch 1 | Step 6170 | Loss: 0.2961 | LM: 0.2877 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:31] Epoch 1 | Step 6180 | Loss: 0.2960 | LM: 0.2878 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:38] Epoch 1 | Step 6190 | Loss: 0.2959 | LM: 0.2878 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:45] Epoch 1 | Step 6200 | Loss: 0.2959 | LM: 0.2877 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:52] Epoch 1 | Step 6210 | Loss: 0.2959 | LM: 0.2876 | LB: 1.1115 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:22:59] Epoch 1 | Step 6220 | Loss: 0.2960 | LM: 0.2877 | LB: 1.1115 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:06] Epoch 1 | Step 6230 | Loss: 0.2959 | LM: 0.2877 | LB: 1.1115 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:13] Epoch 1 | Step 6240 | Loss: 0.2959 | LM: 0.2877 | LB: 1.1115 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.433/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:20] Epoch 1 | Step 6250 | Loss: 0.2959 | LM: 0.2878 | LB: 1.1114 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:27] Epoch 1 | Step 6260 | Loss: 0.2958 | LM: 0.2878 | LB: 1.1114 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:34] Epoch 1 | Step 6270 | Loss: 0.2958 | LM: 0.2877 | LB: 1.1114 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:41] Epoch 1 | Step 6280 | Loss: 0.2957 | LM: 0.2875 | LB: 1.1114 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:48] Epoch 1 | Step 6290 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1113 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:23:55] Epoch 1 | Step 6300 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1113 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:24:02] Epoch 1 | Step 6310 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1113 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.407 | LR: 1.00e-05 +[2026-04-25 21:24:09] Epoch 1 | Step 6320 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:16] Epoch 1 | Step 6330 | Loss: 0.2958 | LM: 0.2877 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:23] Epoch 1 | Step 6340 | Loss: 0.2958 | LM: 0.2877 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:30] Epoch 1 | Step 6350 | Loss: 0.2958 | LM: 0.2878 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:37] Epoch 1 | Step 6360 | Loss: 0.2958 | LM: 0.2879 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:44] Epoch 1 | Step 6370 | Loss: 0.2958 | LM: 0.2879 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:51] Epoch 1 | Step 6380 | Loss: 0.2958 | LM: 0.2878 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:24:59] Epoch 1 | Step 6390 | Loss: 0.2958 | LM: 0.2877 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:06] Epoch 1 | Step 6400 | Loss: 0.2958 | LM: 0.2877 | LB: 1.1110 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:13] Epoch 1 | Step 6410 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1110 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:20] Epoch 1 | Step 6420 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1110 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:27] Epoch 1 | Step 6430 | Loss: 0.2957 | LM: 0.2876 | LB: 1.1110 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:34] Epoch 1 | Step 6440 | Loss: 0.2957 | LM: 0.2875 | LB: 1.1110 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:41] Epoch 1 | Step 6450 | Loss: 0.2957 | LM: 0.2875 | LB: 1.1109 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:48] Epoch 1 | Step 6460 | Loss: 0.2956 | LM: 0.2874 | LB: 1.1109 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:25:55] Epoch 1 | Step 6470 | Loss: 0.2956 | LM: 0.2873 | LB: 1.1109 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:02] Epoch 1 | Step 6480 | Loss: 0.2956 | LM: 0.2873 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:09] Epoch 1 | Step 6490 | Loss: 0.2957 | LM: 0.2874 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:16] Epoch 1 | Step 6500 | Loss: 0.2956 | LM: 0.2873 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:24] Epoch 1 | Step 6510 | Loss: 0.2956 | LM: 0.2873 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:31] Epoch 1 | Step 6520 | Loss: 0.2955 | LM: 0.2874 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:38] Epoch 1 | Step 6530 | Loss: 0.2954 | LM: 0.2872 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:45] Epoch 1 | Step 6540 | Loss: 0.2954 | LM: 0.2873 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:52] Epoch 1 | Step 6550 | Loss: 0.2954 | LM: 0.2871 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:26:59] Epoch 1 | Step 6560 | Loss: 0.2953 | LM: 0.2870 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:06] Epoch 1 | Step 6570 | Loss: 0.2953 | LM: 0.2871 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:13] Epoch 1 | Step 6580 | Loss: 0.2954 | LM: 0.2872 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:20] Epoch 1 | Step 6590 | Loss: 0.2953 | LM: 0.2872 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:27] Epoch 1 | Step 6600 | Loss: 0.2953 | LM: 0.2871 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:34] Epoch 1 | Step 6610 | Loss: 0.2953 | LM: 0.2870 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:41] Epoch 1 | Step 6620 | Loss: 0.2953 | LM: 0.2869 | LB: 1.1105 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:48] Epoch 1 | Step 6630 | Loss: 0.2953 | LM: 0.2868 | LB: 1.1105 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:27:56] Epoch 1 | Step 6640 | Loss: 0.2953 | LM: 0.2869 | LB: 1.1105 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:03] Epoch 1 | Step 6650 | Loss: 0.2953 | LM: 0.2869 | LB: 1.1105 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:10] Epoch 1 | Step 6660 | Loss: 0.2952 | LM: 0.2868 | LB: 1.1104 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:17] Epoch 1 | Step 6670 | Loss: 0.2952 | LM: 0.2867 | LB: 1.1104 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.432/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:24] Epoch 1 | Step 6680 | Loss: 0.2952 | LM: 0.2866 | LB: 1.1104 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:31] Epoch 1 | Step 6690 | Loss: 0.2952 | LM: 0.2866 | LB: 1.1103 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:38] Epoch 1 | Step 6700 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1103 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.406 | LR: 1.00e-05 +[2026-04-25 21:28:45] Epoch 1 | Step 6710 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1103 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:28:52] Epoch 1 | Step 6720 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1103 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:28:59] Epoch 1 | Step 6730 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1102 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:07] Epoch 1 | Step 6740 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1102 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:14] Epoch 1 | Step 6750 | Loss: 0.2951 | LM: 0.2865 | LB: 1.1102 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:21] Epoch 1 | Step 6760 | Loss: 0.2952 | LM: 0.2866 | LB: 1.1102 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:28] Epoch 1 | Step 6770 | Loss: 0.2951 | LM: 0.2865 | LB: 1.1102 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:35] Epoch 1 | Step 6780 | Loss: 0.2951 | LM: 0.2865 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:42] Epoch 1 | Step 6790 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:49] Epoch 1 | Step 6800 | Loss: 0.2952 | LM: 0.2865 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:29:56] Epoch 1 | Step 6810 | Loss: 0.2953 | LM: 0.2864 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:03] Epoch 1 | Step 6820 | Loss: 0.2953 | LM: 0.2864 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:10] Epoch 1 | Step 6830 | Loss: 0.2954 | LM: 0.2865 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:18] Epoch 1 | Step 6840 | Loss: 0.2954 | LM: 0.2865 | LB: 1.1100 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:25] Epoch 1 | Step 6850 | Loss: 0.2954 | LM: 0.2865 | LB: 1.1100 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:32] Epoch 1 | Step 6860 | Loss: 0.2954 | LM: 0.2865 | LB: 1.1100 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:39] Epoch 1 | Step 6870 | Loss: 0.2954 | LM: 0.2865 | LB: 1.1100 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:46] Epoch 1 | Step 6880 | Loss: 0.2954 | LM: 0.2864 | LB: 1.1099 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:30:53] Epoch 1 | Step 6890 | Loss: 0.2954 | LM: 0.2864 | LB: 1.1099 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:00] Epoch 1 | Step 6900 | Loss: 0.2954 | LM: 0.2864 | LB: 1.1099 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:07] Epoch 1 | Step 6910 | Loss: 0.2954 | LM: 0.2863 | LB: 1.1099 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:14] Epoch 1 | Step 6920 | Loss: 0.2953 | LM: 0.2863 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:22] Epoch 1 | Step 6930 | Loss: 0.2953 | LM: 0.2863 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:29] Epoch 1 | Step 6940 | Loss: 0.2953 | LM: 0.2862 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:36] Epoch 1 | Step 6950 | Loss: 0.2952 | LM: 0.2862 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:43] Epoch 1 | Step 6960 | Loss: 0.2953 | LM: 0.2862 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:50] Epoch 1 | Step 6970 | Loss: 0.2952 | LM: 0.2862 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:31:57] Epoch 1 | Step 6980 | Loss: 0.2952 | LM: 0.2863 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:04] Epoch 1 | Step 6990 | Loss: 0.2951 | LM: 0.2862 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:11] Epoch 1 | Step 7000 | Loss: 0.2951 | LM: 0.2863 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:18] Epoch 1 | Step 7010 | Loss: 0.2951 | LM: 0.2862 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:25] Epoch 1 | Step 7020 | Loss: 0.2951 | LM: 0.2864 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:32] Epoch 1 | Step 7030 | Loss: 0.2951 | LM: 0.2863 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:39] Epoch 1 | Step 7040 | Loss: 0.2951 | LM: 0.2863 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:47] Epoch 1 | Step 7050 | Loss: 0.2950 | LM: 0.2863 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:32:54] Epoch 1 | Step 7060 | Loss: 0.2950 | LM: 0.2863 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:01] Epoch 1 | Step 7070 | Loss: 0.2950 | LM: 0.2863 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:08] Epoch 1 | Step 7080 | Loss: 0.2950 | LM: 0.2863 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:15] Epoch 1 | Step 7090 | Loss: 0.2950 | LM: 0.2865 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:22] Epoch 1 | Step 7100 | Loss: 0.2950 | LM: 0.2862 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:29] Epoch 1 | Step 7110 | Loss: 0.2949 | LM: 0.2862 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:36] Epoch 1 | Step 7120 | Loss: 0.2950 | LM: 0.2861 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:43] Epoch 1 | Step 7130 | Loss: 0.2950 | LM: 0.2861 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:51] Epoch 1 | Step 7140 | Loss: 0.2949 | LM: 0.2861 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:33:58] Epoch 1 | Step 7150 | Loss: 0.2950 | LM: 0.2860 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:34:05] Epoch 1 | Step 7160 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:34:12] Epoch 1 | Step 7170 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:34:19] Epoch 1 | Step 7180 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.405 | LR: 1.00e-05 +[2026-04-25 21:34:26] Epoch 1 | Step 7190 | Loss: 0.2950 | LM: 0.2860 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:34:33] Epoch 1 | Step 7200 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:34:40] Epoch 1 | Step 7210 | Loss: 0.2949 | LM: 0.2858 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:34:47] Epoch 1 | Step 7220 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:34:54] Epoch 1 | Step 7230 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1094 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:01] Epoch 1 | Step 7240 | Loss: 0.2949 | LM: 0.2858 | LB: 1.1094 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:09] Epoch 1 | Step 7250 | Loss: 0.2949 | LM: 0.2858 | LB: 1.1094 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.431/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:16] Epoch 1 | Step 7260 | Loss: 0.2949 | LM: 0.2857 | LB: 1.1094 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:23] Epoch 1 | Step 7270 | Loss: 0.2949 | LM: 0.2858 | LB: 1.1093 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:30] Epoch 1 | Step 7280 | Loss: 0.2950 | LM: 0.2858 | LB: 1.1093 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:37] Epoch 1 | Step 7290 | Loss: 0.2949 | LM: 0.2858 | LB: 1.1093 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:44] Epoch 1 | Step 7300 | Loss: 0.2949 | LM: 0.2858 | LB: 1.1093 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:52] Epoch 1 | Step 7310 | Loss: 0.2949 | LM: 0.2857 | LB: 1.1093 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:35:59] Epoch 1 | Step 7320 | Loss: 0.2948 | LM: 0.2858 | LB: 1.1093 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:06] Epoch 1 | Step 7330 | Loss: 0.2948 | LM: 0.2858 | LB: 1.1092 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:13] Epoch 1 | Step 7340 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1092 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:20] Epoch 1 | Step 7350 | Loss: 0.2949 | LM: 0.2860 | LB: 1.1092 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:27] Epoch 1 | Step 7360 | Loss: 0.2949 | LM: 0.2859 | LB: 1.1092 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:34] Epoch 1 | Step 7370 | Loss: 0.2948 | LM: 0.2858 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:41] Epoch 1 | Step 7380 | Loss: 0.2948 | LM: 0.2858 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:48] Epoch 1 | Step 7390 | Loss: 0.2947 | LM: 0.2857 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:36:55] Epoch 1 | Step 7400 | Loss: 0.2947 | LM: 0.2857 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:03] Epoch 1 | Step 7410 | Loss: 0.2947 | LM: 0.2857 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:10] Epoch 1 | Step 7420 | Loss: 0.2948 | LM: 0.2857 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:17] Epoch 1 | Step 7430 | Loss: 0.2948 | LM: 0.2857 | LB: 1.1091 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:24] Epoch 1 | Step 7440 | Loss: 0.2948 | LM: 0.2857 | LB: 1.1090 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:31] Epoch 1 | Step 7450 | Loss: 0.2947 | LM: 0.2855 | LB: 1.1090 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:38] Epoch 1 | Step 7460 | Loss: 0.2947 | LM: 0.2855 | LB: 1.1090 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:45] Epoch 1 | Step 7470 | Loss: 0.2947 | LM: 0.2854 | LB: 1.1090 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:52] Epoch 1 | Step 7480 | Loss: 0.2947 | LM: 0.2853 | LB: 1.1090 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:37:59] Epoch 1 | Step 7490 | Loss: 0.2948 | LM: 0.2854 | LB: 1.1089 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:06] Epoch 1 | Step 7500 | Loss: 0.2948 | LM: 0.2854 | LB: 1.1089 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:13] Epoch 1 | Step 7510 | Loss: 0.2948 | LM: 0.2853 | LB: 1.1089 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:21] Epoch 1 | Step 7520 | Loss: 0.2948 | LM: 0.2853 | LB: 1.1089 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:28] Epoch 1 | Step 7530 | Loss: 0.2947 | LM: 0.2853 | LB: 1.1089 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:35] Epoch 1 | Step 7540 | Loss: 0.2947 | LM: 0.2852 | LB: 1.1089 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:42] Epoch 1 | Step 7550 | Loss: 0.2947 | LM: 0.2853 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:49] Epoch 1 | Step 7560 | Loss: 0.2947 | LM: 0.2852 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:38:56] Epoch 1 | Step 7570 | Loss: 0.2947 | LM: 0.2852 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:03] Epoch 1 | Step 7580 | Loss: 0.2947 | LM: 0.2852 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:10] Epoch 1 | Step 7590 | Loss: 0.2946 | LM: 0.2851 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:17] Epoch 1 | Step 7600 | Loss: 0.2946 | LM: 0.2852 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:24] Epoch 1 | Step 7610 | Loss: 0.2946 | LM: 0.2853 | LB: 1.1088 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:31] Epoch 1 | Step 7620 | Loss: 0.2945 | LM: 0.2852 | LB: 1.1087 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:38] Epoch 1 | Step 7630 | Loss: 0.2945 | LM: 0.2852 | LB: 1.1087 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:46] Epoch 1 | Step 7640 | Loss: 0.2945 | LM: 0.2851 | LB: 1.1087 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:39:53] Epoch 1 | Step 7650 | Loss: 0.2945 | LM: 0.2850 | LB: 1.1087 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:00] Epoch 1 | Step 7660 | Loss: 0.2944 | LM: 0.2850 | LB: 1.1087 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:07] Epoch 1 | Step 7670 | Loss: 0.2944 | LM: 0.2850 | LB: 1.1087 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:14] Epoch 1 | Step 7680 | Loss: 0.2943 | LM: 0.2850 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:21] Epoch 1 | Step 7690 | Loss: 0.2944 | LM: 0.2850 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:28] Epoch 1 | Step 7700 | Loss: 0.2943 | LM: 0.2849 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:35] Epoch 1 | Step 7710 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:42] Epoch 1 | Step 7720 | Loss: 0.2944 | LM: 0.2848 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:49] Epoch 1 | Step 7730 | Loss: 0.2944 | LM: 0.2848 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.404 | LR: 1.00e-05 +[2026-04-25 21:40:56] Epoch 1 | Step 7740 | Loss: 0.2944 | LM: 0.2849 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:04] Epoch 1 | Step 7750 | Loss: 0.2944 | LM: 0.2850 | LB: 1.1086 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:11] Epoch 1 | Step 7760 | Loss: 0.2944 | LM: 0.2849 | LB: 1.1085 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:18] Epoch 1 | Step 7770 | Loss: 0.2944 | LM: 0.2848 | LB: 1.1085 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:25] Epoch 1 | Step 7780 | Loss: 0.2943 | LM: 0.2848 | LB: 1.1085 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:32] Epoch 1 | Step 7790 | Loss: 0.2943 | LM: 0.2848 | LB: 1.1085 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:39] Epoch 1 | Step 7800 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1085 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:46] Epoch 1 | Step 7810 | Loss: 0.2944 | LM: 0.2847 | LB: 1.1084 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:41:53] Epoch 1 | Step 7820 | Loss: 0.2943 | LM: 0.2848 | LB: 1.1084 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:00] Epoch 1 | Step 7830 | Loss: 0.2943 | LM: 0.2848 | LB: 1.1084 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:07] Epoch 1 | Step 7840 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1084 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:14] Epoch 1 | Step 7850 | Loss: 0.2942 | LM: 0.2845 | LB: 1.1084 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.430/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:21] Epoch 1 | Step 7860 | Loss: 0.2942 | LM: 0.2845 | LB: 1.1083 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:29] Epoch 1 | Step 7870 | Loss: 0.2941 | LM: 0.2844 | LB: 1.1083 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:36] Epoch 1 | Step 7880 | Loss: 0.2942 | LM: 0.2845 | LB: 1.1083 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:43] Epoch 1 | Step 7890 | Loss: 0.2942 | LM: 0.2845 | LB: 1.1083 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:50] Epoch 1 | Step 7900 | Loss: 0.2942 | LM: 0.2845 | LB: 1.1083 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:42:57] Epoch 1 | Step 7910 | Loss: 0.2942 | LM: 0.2846 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:04] Epoch 1 | Step 7920 | Loss: 0.2942 | LM: 0.2846 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:11] Epoch 1 | Step 7930 | Loss: 0.2942 | LM: 0.2846 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:18] Epoch 1 | Step 7940 | Loss: 0.2943 | LM: 0.2846 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:25] Epoch 1 | Step 7950 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:33] Epoch 1 | Step 7960 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:40] Epoch 1 | Step 7970 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:47] Epoch 1 | Step 7980 | Loss: 0.2943 | LM: 0.2847 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:43:54] Epoch 1 | Step 7990 | Loss: 0.2942 | LM: 0.2847 | LB: 1.1082 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:01] Epoch 1 | Step 8000 | Loss: 0.2942 | LM: 0.2846 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:02] Validation | Batch 10/42 | Loss: 0.2754 | LM_LOSS: 0.2645 | LB_LOSS: 1.0908 +[2026-04-25 21:44:03] Validation | Batch 20/42 | Loss: 0.2965 | LM_LOSS: 0.2855 | LB_LOSS: 1.0943 +[2026-04-25 21:44:04] Validation | Batch 30/42 | Loss: 0.2888 | LM_LOSS: 0.2779 | LB_LOSS: 1.0916 +[2026-04-25 21:44:05] Validation | Batch 40/42 | Loss: 0.2926 | LM_LOSS: 0.2817 | LB_LOSS: 1.0908 +[2026-04-25 21:44:06] Validation | Batch 42/42 | Loss: 0.2922 | LM_LOSS: 0.2813 | LB_LOSS: 1.0911 +[2026-04-25 21:44:06] Validation | Loss: 0.2922 | LM_LOSS: 0.2813 | LB_LOSS: 1.0911 | PPL: 1.32 | Time: 4.74s +[2026-04-25 21:44:09] New best model saved! Val loss: 0.2922 +[2026-04-25 21:44:16] Epoch 1 | Step 8010 | Loss: 0.2942 | LM: 0.2845 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:23] Epoch 1 | Step 8020 | Loss: 0.2941 | LM: 0.2844 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:30] Epoch 1 | Step 8030 | Loss: 0.2941 | LM: 0.2844 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:38] Epoch 1 | Step 8040 | Loss: 0.2942 | LM: 0.2843 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:45] Epoch 1 | Step 8050 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:52] Epoch 1 | Step 8060 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:44:59] Epoch 1 | Step 8070 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:06] Epoch 1 | Step 8080 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:13] Epoch 1 | Step 8090 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:20] Epoch 1 | Step 8100 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1081 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:27] Epoch 1 | Step 8110 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:34] Epoch 1 | Step 8120 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:41] Epoch 1 | Step 8130 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:48] Epoch 1 | Step 8140 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:45:56] Epoch 1 | Step 8150 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:03] Epoch 1 | Step 8160 | Loss: 0.2940 | LM: 0.2840 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:10] Epoch 1 | Step 8170 | Loss: 0.2940 | LM: 0.2840 | LB: 1.1080 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:17] Epoch 1 | Step 8180 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1079 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:24] Epoch 1 | Step 8190 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1079 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:31] Epoch 1 | Step 8200 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1079 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:38] Epoch 1 | Step 8210 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1079 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:45] Epoch 1 | Step 8220 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1079 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:46:52] Epoch 1 | Step 8230 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1079 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:00] Epoch 1 | Step 8240 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1078 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:07] Epoch 1 | Step 8250 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1078 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:14] Epoch 1 | Step 8260 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1078 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:21] Epoch 1 | Step 8270 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1078 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:28] Epoch 1 | Step 8280 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1078 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:35] Epoch 1 | Step 8290 | Loss: 0.2942 | LM: 0.2841 | LB: 1.1078 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:42] Epoch 1 | Step 8300 | Loss: 0.2941 | LM: 0.2840 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:49] Epoch 1 | Step 8310 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:47:56] Epoch 1 | Step 8320 | Loss: 0.2941 | LM: 0.2840 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:48:04] Epoch 1 | Step 8330 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.403 | LR: 1.00e-05 +[2026-04-25 21:48:11] Epoch 1 | Step 8340 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:48:18] Epoch 1 | Step 8350 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:48:25] Epoch 1 | Step 8360 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1077 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:48:32] Epoch 1 | Step 8370 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:48:39] Epoch 1 | Step 8380 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:48:46] Epoch 1 | Step 8390 | Loss: 0.2941 | LM: 0.2843 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:48:53] Epoch 1 | Step 8400 | Loss: 0.2941 | LM: 0.2843 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:00] Epoch 1 | Step 8410 | Loss: 0.2941 | LM: 0.2843 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:08] Epoch 1 | Step 8420 | Loss: 0.2941 | LM: 0.2843 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:15] Epoch 1 | Step 8430 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:22] Epoch 1 | Step 8440 | Loss: 0.2941 | LM: 0.2842 | LB: 1.1076 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:29] Epoch 1 | Step 8450 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:36] Epoch 1 | Step 8460 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:43] Epoch 1 | Step 8470 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:50] Epoch 1 | Step 8480 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:49:57] Epoch 1 | Step 8490 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:05] Epoch 1 | Step 8500 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:12] Epoch 1 | Step 8510 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:19] Epoch 1 | Step 8520 | Loss: 0.2940 | LM: 0.2840 | LB: 1.1075 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:26] Epoch 1 | Step 8530 | Loss: 0.2940 | LM: 0.2840 | LB: 1.1074 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:33] Epoch 1 | Step 8540 | Loss: 0.2940 | LM: 0.2842 | LB: 1.1074 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:40] Epoch 1 | Step 8550 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1074 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:47] Epoch 1 | Step 8560 | Loss: 0.2940 | LM: 0.2841 | LB: 1.1074 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:50:54] Epoch 1 | Step 8570 | Loss: 0.2941 | LM: 0.2841 | LB: 1.1074 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:01] Epoch 1 | Step 8580 | Loss: 0.2940 | LM: 0.2840 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:08] Epoch 1 | Step 8590 | Loss: 0.2940 | LM: 0.2840 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:15] Epoch 1 | Step 8600 | Loss: 0.2940 | LM: 0.2839 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:22] Epoch 1 | Step 8610 | Loss: 0.2940 | LM: 0.2839 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:29] Epoch 1 | Step 8620 | Loss: 0.2939 | LM: 0.2838 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:36] Epoch 1 | Step 8630 | Loss: 0.2939 | LM: 0.2838 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:43] Epoch 1 | Step 8640 | Loss: 0.2939 | LM: 0.2839 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:51] Epoch 1 | Step 8650 | Loss: 0.2940 | LM: 0.2838 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.429/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:51:58] Epoch 1 | Step 8660 | Loss: 0.2939 | LM: 0.2837 | LB: 1.1073 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:05] Epoch 1 | Step 8670 | Loss: 0.2940 | LM: 0.2838 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:12] Epoch 1 | Step 8680 | Loss: 0.2940 | LM: 0.2837 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:19] Epoch 1 | Step 8690 | Loss: 0.2939 | LM: 0.2837 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:26] Epoch 1 | Step 8700 | Loss: 0.2939 | LM: 0.2837 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:33] Epoch 1 | Step 8710 | Loss: 0.2938 | LM: 0.2836 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:40] Epoch 1 | Step 8720 | Loss: 0.2938 | LM: 0.2836 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:47] Epoch 1 | Step 8730 | Loss: 0.2938 | LM: 0.2837 | LB: 1.1072 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:52:54] Epoch 1 | Step 8740 | Loss: 0.2938 | LM: 0.2837 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:01] Epoch 1 | Step 8750 | Loss: 0.2938 | LM: 0.2837 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:08] Epoch 1 | Step 8760 | Loss: 0.2938 | LM: 0.2837 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:15] Epoch 1 | Step 8770 | Loss: 0.2938 | LM: 0.2837 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:22] Epoch 1 | Step 8780 | Loss: 0.2938 | LM: 0.2838 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:30] Epoch 1 | Step 8790 | Loss: 0.2938 | LM: 0.2838 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:37] Epoch 1 | Step 8800 | Loss: 0.2937 | LM: 0.2838 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:44] Epoch 1 | Step 8810 | Loss: 0.2937 | LM: 0.2837 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:51] Epoch 1 | Step 8820 | Loss: 0.2937 | LM: 0.2838 | LB: 1.1071 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:53:58] Epoch 1 | Step 8830 | Loss: 0.2937 | LM: 0.2839 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:05] Epoch 1 | Step 8840 | Loss: 0.2937 | LM: 0.2838 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:12] Epoch 1 | Step 8850 | Loss: 0.2937 | LM: 0.2838 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:19] Epoch 1 | Step 8860 | Loss: 0.2937 | LM: 0.2838 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:26] Epoch 1 | Step 8870 | Loss: 0.2937 | LM: 0.2839 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:33] Epoch 1 | Step 8880 | Loss: 0.2936 | LM: 0.2839 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:40] Epoch 1 | Step 8890 | Loss: 0.2936 | LM: 0.2839 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:47] Epoch 1 | Step 8900 | Loss: 0.2935 | LM: 0.2838 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:54:54] Epoch 1 | Step 8910 | Loss: 0.2935 | LM: 0.2839 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:02] Epoch 1 | Step 8920 | Loss: 0.2935 | LM: 0.2838 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:09] Epoch 1 | Step 8930 | Loss: 0.2934 | LM: 0.2838 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:16] Epoch 1 | Step 8940 | Loss: 0.2935 | LM: 0.2838 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:23] Epoch 1 | Step 8950 | Loss: 0.2935 | LM: 0.2838 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:30] Epoch 1 | Step 8960 | Loss: 0.2935 | LM: 0.2838 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:37] Epoch 1 | Step 8970 | Loss: 0.2935 | LM: 0.2838 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:44] Epoch 1 | Step 8980 | Loss: 0.2934 | LM: 0.2837 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:51] Epoch 1 | Step 8990 | Loss: 0.2934 | LM: 0.2837 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:55:58] Epoch 1 | Step 9000 | Loss: 0.2934 | LM: 0.2837 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:56:06] Epoch 1 | Step 9010 | Loss: 0.2934 | LM: 0.2837 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:56:13] Epoch 1 | Step 9020 | Loss: 0.2934 | LM: 0.2836 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.402 | LR: 1.00e-05 +[2026-04-25 21:56:20] Epoch 1 | Step 9030 | Loss: 0.2934 | LM: 0.2837 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:56:28] Epoch 1 | Step 9040 | Loss: 0.2934 | LM: 0.2836 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:56:36] Epoch 1 | Step 9050 | Loss: 0.2934 | LM: 0.2837 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:56:43] Epoch 1 | Step 9060 | Loss: 0.2934 | LM: 0.2836 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:56:50] Epoch 1 | Step 9070 | Loss: 0.2934 | LM: 0.2836 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:56:58] Epoch 1 | Step 9080 | Loss: 0.2934 | LM: 0.2835 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:05] Epoch 1 | Step 9090 | Loss: 0.2933 | LM: 0.2834 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:12] Epoch 1 | Step 9100 | Loss: 0.2934 | LM: 0.2835 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:20] Epoch 1 | Step 9110 | Loss: 0.2934 | LM: 0.2834 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:27] Epoch 1 | Step 9120 | Loss: 0.2934 | LM: 0.2835 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:34] Epoch 1 | Step 9130 | Loss: 0.2933 | LM: 0.2835 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:42] Epoch 1 | Step 9140 | Loss: 0.2933 | LM: 0.2835 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:49] Epoch 1 | Step 9150 | Loss: 0.2933 | LM: 0.2834 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:57:56] Epoch 1 | Step 9160 | Loss: 0.2933 | LM: 0.2834 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:04] Epoch 1 | Step 9170 | Loss: 0.2932 | LM: 0.2833 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:11] Epoch 1 | Step 9180 | Loss: 0.2932 | LM: 0.2833 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:18] Epoch 1 | Step 9190 | Loss: 0.2932 | LM: 0.2832 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:25] Epoch 1 | Step 9200 | Loss: 0.2932 | LM: 0.2832 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:32] Epoch 1 | Step 9210 | Loss: 0.2932 | LM: 0.2832 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:39] Epoch 1 | Step 9220 | Loss: 0.2932 | LM: 0.2832 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:47] Epoch 1 | Step 9230 | Loss: 0.2931 | LM: 0.2831 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:58:54] Epoch 1 | Step 9240 | Loss: 0.2931 | LM: 0.2831 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:01] Epoch 1 | Step 9250 | Loss: 0.2931 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:08] Epoch 1 | Step 9260 | Loss: 0.2930 | LM: 0.2831 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:15] Epoch 1 | Step 9270 | Loss: 0.2930 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:23] Epoch 1 | Step 9280 | Loss: 0.2930 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:30] Epoch 1 | Step 9290 | Loss: 0.2930 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:37] Epoch 1 | Step 9300 | Loss: 0.2930 | LM: 0.2829 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:44] Epoch 1 | Step 9310 | Loss: 0.2930 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:51] Epoch 1 | Step 9320 | Loss: 0.2930 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 21:59:58] Epoch 1 | Step 9330 | Loss: 0.2930 | LM: 0.2830 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:05] Epoch 1 | Step 9340 | Loss: 0.2929 | LM: 0.2829 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:13] Epoch 1 | Step 9350 | Loss: 0.2929 | LM: 0.2829 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:20] Epoch 1 | Step 9360 | Loss: 0.2929 | LM: 0.2828 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:27] Epoch 1 | Step 9370 | Loss: 0.2929 | LM: 0.2828 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:34] Epoch 1 | Step 9380 | Loss: 0.2929 | LM: 0.2827 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:41] Epoch 1 | Step 9390 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:48] Epoch 1 | Step 9400 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:00:56] Epoch 1 | Step 9410 | Loss: 0.2928 | LM: 0.2828 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:03] Epoch 1 | Step 9420 | Loss: 0.2929 | LM: 0.2828 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:10] Epoch 1 | Step 9430 | Loss: 0.2929 | LM: 0.2827 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:17] Epoch 1 | Step 9440 | Loss: 0.2929 | LM: 0.2827 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:24] Epoch 1 | Step 9450 | Loss: 0.2929 | LM: 0.2827 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:31] Epoch 1 | Step 9460 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:39] Epoch 1 | Step 9470 | Loss: 0.2928 | LM: 0.2826 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:46] Epoch 1 | Step 9480 | Loss: 0.2927 | LM: 0.2825 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:01:53] Epoch 1 | Step 9490 | Loss: 0.2928 | LM: 0.2826 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:00] Epoch 1 | Step 9500 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:07] Epoch 1 | Step 9510 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:14] Epoch 1 | Step 9520 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:22] Epoch 1 | Step 9530 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:29] Epoch 1 | Step 9540 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:36] Epoch 1 | Step 9550 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:43] Epoch 1 | Step 9560 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:50] Epoch 1 | Step 9570 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:02:57] Epoch 1 | Step 9580 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:04] Epoch 1 | Step 9590 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:12] Epoch 1 | Step 9600 | Loss: 0.2928 | LM: 0.2827 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:19] Epoch 1 | Step 9610 | Loss: 0.2928 | LM: 0.2826 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:26] Epoch 1 | Step 9620 | Loss: 0.2928 | LM: 0.2826 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:33] Epoch 1 | Step 9630 | Loss: 0.2928 | LM: 0.2826 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:40] Epoch 1 | Step 9640 | Loss: 0.2928 | LM: 0.2825 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:47] Epoch 1 | Step 9650 | Loss: 0.2928 | LM: 0.2825 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:03:54] Epoch 1 | Step 9660 | Loss: 0.2928 | LM: 0.2825 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:02] Epoch 1 | Step 9670 | Loss: 0.2928 | LM: 0.2825 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:09] Epoch 1 | Step 9680 | Loss: 0.2928 | LM: 0.2825 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:16] Epoch 1 | Step 9690 | Loss: 0.2928 | LM: 0.2824 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:23] Epoch 1 | Step 9700 | Loss: 0.2928 | LM: 0.2824 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:31] Epoch 1 | Step 9710 | Loss: 0.2927 | LM: 0.2824 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:38] Epoch 1 | Step 9720 | Loss: 0.2927 | LM: 0.2824 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.355 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:45] Epoch 1 | Step 9730 | Loss: 0.2927 | LM: 0.2824 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:52] Epoch 1 | Step 9740 | Loss: 0.2927 | LM: 0.2824 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:04:59] Epoch 1 | Step 9750 | Loss: 0.2927 | LM: 0.2823 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:07] Epoch 1 | Step 9760 | Loss: 0.2927 | LM: 0.2824 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:14] Epoch 1 | Step 9770 | Loss: 0.2927 | LM: 0.2823 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:21] Epoch 1 | Step 9780 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:28] Epoch 1 | Step 9790 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:35] Epoch 1 | Step 9800 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:42] Epoch 1 | Step 9810 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:50] Epoch 1 | Step 9820 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:05:57] Epoch 1 | Step 9830 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:06:04] Epoch 1 | Step 9840 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:06:11] Epoch 1 | Step 9850 | Loss: 0.2926 | LM: 0.2822 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:06:19] Epoch 1 | Step 9860 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:06:26] Epoch 1 | Step 9870 | Loss: 0.2926 | LM: 0.2823 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:06:33] Epoch 1 | Step 9880 | Loss: 0.2926 | LM: 0.2824 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.401 | LR: 1.00e-05 +[2026-04-25 22:06:38] Epoch 1 completed in 7105.89s | Loss: 0.2926 | CL0: 2.8 | CL1: 2.4 +[2026-04-25 22:06:38] +Training completed! +[2026-04-25 22:06:41] Final model: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_1e-4/model_final.pt +wandb: WARNING Fatal error while uploading data. Some run data will not be synced, but it will still be written to disk. Use `wandb sync` at the end of the run to try uploading. \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/wandb-summary.json b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..f47c4158e741a17bb22694d7829185137a217761 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime": 7158, "train/epoch": 1, "train/step_time": 0.7092468976974488, "train/chunk_len_stage0": 2.8179939558054445, "train/chunk_len_stage1": 2.354251368177087, "_step": 9880, "_timestamp": 1777154798.6939964, "train/loss": 0.19837140291929245, "train/lm_loss": 0.22973915934562683, "train/lb_loss": 1.1289136409759521, "train/hard_boundary_ratio_stage0": 0.35757633658743543, "train/hard_boundary_ratio_stage1": 0.42741741229545455, "train/soft_boundary_ratio_stage1": 0.40072747577085055, "train/loss_avg": 0.29260910248965016, "train/lr": 1e-05, "train/soft_boundary_ratio_stage0": 0.3544859938887565, "val/perplexity": 1.321679014796039, "val/lm_loss": 0.2812648575220789, "val/lb_loss": 1.0910536590076627, "best/step": 8000, "val/time": 4.739992141723633, "best/val_perplexity": 1.321679014796039, "val/loss": 0.29217539443856194, "best/val_loss": 0.29217539443856194, "epoch/chunk_len_stage1": 2.3543435574383285, "epoch/soft_boundary_ratio_stage0": 0.35449292086488965, "epoch/soft_boundary_ratio_stage1": 0.4007127851565031, "epoch/lb_loss": 1.1061990606274803, "epoch/lm_loss": 0.28240673256245047, "epoch/chunk_len_stage0": 2.8179205942465053, "epoch/hard_boundary_ratio_stage0": 0.35758569070574475, "epoch/loss": 0.29261703164552394, "epoch/time": 7105.889644384384, "epoch/hard_boundary_ratio_stage1": 0.42740058972624} \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/run-d5usyud5.wandb b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/run-d5usyud5.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b7bc920c4a9030b35d9727112ac2ed9629b39dd2 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_1e-4/wandb/run-20260425_200722-d5usyud5/run-d5usyud5.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f8b8e6b5350654f7b0216247b78604c928fa1b5d7dab31aa40755bb6c3f5e02 +size 3080041 diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/config.yaml b/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c5a0c9eba544ebc6fce07de55ef7777259419bf5 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/config.yaml @@ -0,0 +1,55 @@ +model: + config_path: ${oc.env:PROJECT_ROOT}/hnet_project/configs/hnet_2stage_XL_code.json + checkpoint_path: ${oc.env:PROJECT_ROOT}/hnet_project/checkpoints/hnet_2stage_XL_code.pt +training: + epochs: 1 + batch_size: 4 + eval_batch_size: 24 + gradient_accumulation_steps: 4 + lr: 0.0002 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + lr_multiplier: + - 2.0 + - 1.5 + - 1.0 + load_balancing_weight: 0.01 + load_balancing_N: 4.0 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null + warmup_model: true +data: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 0 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: hnet_xl_code_lr_2e-4 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: ${paths.output_dir} +paths: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 +seed: 42 +device: cuda diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/hydra.yaml b/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac11f0243810424d758b47cc764767aab4f0e9a4 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/hydra.yaml @@ -0,0 +1,166 @@ +hydra: + run: + dir: ${paths.output_dir} + sweep: + dir: outputs/multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=hnet_xl_code_lr_2e-4 + - training.lr=2e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + job: + name: train + chdir: false + override_dirname: data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full,paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4,tracking.project=code-completion_lr-sweep,tracking.run_name=hnet_xl_code_lr_2e-4,tracking=wandb,training.lr=2e-4 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /workspace/byte-llms-code/code_completion_exp/train_hnet + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /workspace/byte-llms-code/code_completion_exp/train_hnet/configs + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 + choices: + paths: default + tracking: wandb + logging: default + data: default + training: default + model: hnet_xl_code + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/overrides.yaml b/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..58bb9db44893fa88f05c44128a36ad2cda36c57b --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/.hydra/overrides.yaml @@ -0,0 +1,6 @@ +- tracking=wandb +- tracking.project=code-completion_lr-sweep +- tracking.run_name=hnet_xl_code_lr_2e-4 +- training.lr=2e-4 +- paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 +- data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/debug-internal.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..38ebdcd269c96a26557edde1eca568896bcb3ee0 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2026-04-25T22:20:12.017648642Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-25T22:20:12.401332467Z","level":"INFO","msg":"stream: created new stream","id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.401402455Z","level":"INFO","msg":"handler: started","stream_id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.401496869Z","level":"INFO","msg":"stream: started","id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.401507601Z","level":"INFO","msg":"writer: started","stream_id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.40151694Z","level":"INFO","msg":"sender: started","stream_id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.5289538Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-25T23:20:22.74400376Z","level":"ERROR","msg":"api: HTTP error","status":403,"method":"POST","url":"https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/khn25dwv/file_stream"} +{"time":"2026-04-25T23:20:22.744078123Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 403 Forbidden url=https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/khn25dwv/file_stream: "} +{"time":"2026-04-26T00:19:30.370077692Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-26T00:19:30.370863672Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-26T00:19:30.373711643Z","level":"INFO","msg":"stream: closing","id":"khn25dwv"} +{"time":"2026-04-26T00:19:30.373733071Z","level":"INFO","msg":"handler: closed","stream_id":"khn25dwv"} +{"time":"2026-04-26T00:19:30.37382892Z","level":"INFO","msg":"sender: closed","stream_id":"khn25dwv"} +{"time":"2026-04-26T00:19:30.373836548Z","level":"INFO","msg":"stream: closed","id":"khn25dwv"} diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/debug.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d2eb782480e3c71d67c702cc65d44a998e99a273 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/debug.log @@ -0,0 +1,24 @@ +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_setup.py:_flush():81] Configure stats pid to 198705 +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug.log +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-internal.log +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:init():844] calling init triggers +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'config_path': '/workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json', 'checkpoint_path': '/workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt'}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 24, 'gradient_accumulation_steps': 4, 'lr': 0.0002, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'lr_multiplier': [2.0, 1.5, 1.0], 'load_balancing_weight': 0.01, 'load_balancing_N': 4.0, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None, 'warmup_model': True}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 0, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'hnet_xl_code_lr_2e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_hnet/train.py'}} +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:init():892] starting backend +2026-04-25 22:20:11,992 INFO MainThread:198705 [wandb_init.py:init():895] sending inform_init request +2026-04-25 22:20:12,016 INFO MainThread:198705 [wandb_init.py:init():903] backend started and connected +2026-04-25 22:20:12,019 INFO MainThread:198705 [wandb_init.py:init():973] updated telemetry +2026-04-25 22:20:12,040 INFO MainThread:198705 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 22:20:12,528 INFO MainThread:198705 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 22:20:12,685 INFO MainThread:198705 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 22:20:12,685 INFO MainThread:198705 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 22:20:12,685 INFO MainThread:198705 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 22:20:12,686 INFO MainThread:198705 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 22:20:12,688 INFO MainThread:198705 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-26 00:19:29,581 INFO MainThread:198705 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/khn25dwv +2026-04-26 00:19:29,582 INFO MainThread:198705 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-26 00:19:29,582 INFO MainThread:198705 [wandb_run.py:_restore():2476] restore +2026-04-26 00:19:29,582 INFO MainThread:198705 [wandb_run.py:_restore():2482] restore done +2026-04-26 00:19:30,373 INFO MainThread:198705 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/code/code_completion_exp/train_hnet/train.py b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/code/code_completion_exp/train_hnet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7c306fe6e62d718f1815d106471a779b413a20 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/code/code_completion_exp/train_hnet/train.py @@ -0,0 +1,284 @@ +""" +Training Pipeline для HNet модели на задаче Code Completion. + +Конфигурация через Hydra + OmegaConf, логирование в Trackio. +Поддержка DDP через Accelerate для multi-GPU тренировки. + +Использование: + # Базовый запуск (single GPU) + python train.py + + # Multi-GPU с Accelerate + accelerate launch train.py + + # Multi-GPU с указанием количества GPU + accelerate launch --num_processes=4 train.py + + # Переопределение параметров через CLI + python train.py training.lr=1e-4 training.epochs=5 + + # Выбор другого конфига модели + python train.py model=hnet_small + + # Multirun (sweep) + python train.py --multirun training.lr=1e-4,3e-4,1e-3 + + # Без логирования + python train.py tracking.enabled=false +""" + +import os +import math +from pathlib import Path + +import torch +import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from accelerate import Accelerator +from accelerate.utils import set_seed as accelerate_set_seed + +# HNet imports +from hnet.load_utils import load_from_pretrained, load_from_config +from hnet.utils.tokenizers import ByteTokenizer +from hnet.utils.train import group_params + +# Ensure repo root is on sys.path (needed when running from subdirectory) +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# Shared training library +from training_lib.utils import log_message +from training_lib.checkpointing import save_checkpoint, load_checkpoint +from training_lib.schedulers import get_lr_scheduler +from training_lib.tracking import init_tracking, finish_tracking +from training_lib.hnet.train_loop import train_epoch +from training_lib.hnet.data import create_dataloaders + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def main(cfg: DictConfig): + """Глав��ая функция тренировки с поддержкой DDP чере�� Accelerate.""" + + # === Accelerator Setup === + mixed_precision = "bf16" if cfg.training.use_amp else "no" + + accelerator = Accelerator( + mixed_precision=mixed_precision, + gradient_accumulation_steps=cfg.training.gradient_accumulation_steps, + ) + + # === Setup === + accelerate_set_seed(cfg.seed) + + if cfg.paths.output_dir is None: + cfg.paths.output_dir = HydraConfig.get().runtime.output_dir + + OmegaConf.resolve(cfg) + + log_message( + f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'not set')}", + cfg, + accelerator, + ) + log_message(f"Number of processes: {accelerator.num_processes}", cfg, accelerator) + log_message(f"Process index: {accelerator.process_index}", cfg, accelerator) + log_message(f"Mixed precision: {mixed_precision}", cfg, accelerator) + + log_message("=" * 60, cfg, accelerator) + log_message( + "HNet Training Pipeline (Hydra + Trackio + Accelerate)", cfg, accelerator + ) + log_message("=" * 60, cfg, accelerator) + log_message(f"Config:\n{OmegaConf.to_yaml(cfg)}", cfg, accelerator) + + # === Trackio Init === + init_tracking(cfg, accelerator) + + # === Tokenizer === + log_message("Initializing tokenizer...", cfg, accelerator) + tokenizer = ByteTokenizer() + + # === Model === + log_message("Loading model...", cfg, accelerator) + if cfg.model.checkpoint_path: + model = load_from_pretrained( + model_path=cfg.model.checkpoint_path, + model_config_path=cfg.model.config_path, + ) + log_message(f"Loaded pretrained: {cfg.model.checkpoint_path}", cfg, accelerator) + else: + model = load_from_config( + model_config_path=cfg.model.config_path, + device="cpu", + ) + model.init_weights() + log_message("Initialized from scratch", cfg, accelerator) + + model.train() + + # LR multiplier для разны�� стадий (до prepare!) + lr_multiplier = list(cfg.training.lr_multiplier) + model.apply_lr_multiplier(lr_multiplier) + log_message(f"Applied LR multipliers: {lr_multiplier}", cfg, accelerator) + + # Warmup для Triton kernels + if cfg.training.warmup_model: + log_message("Warming up model...", cfg, accelerator) + model = model.to(accelerator.device) + model.warmup(verbose=accelerator.is_main_process) + + # Log model info + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + log_message(f"Total params: {total_params:,}", cfg, accelerator) + log_message(f"Trainable params: {trainable_params:,}", cfg, accelerator) + + # === Data === + log_message("Creating dataloaders...", cfg, accelerator) + dataloaders = create_dataloaders(cfg, tokenizer) + + train_dataloader = dataloaders["train"] + val_dataloader = dataloaders.get("validation", None) + + log_message( + f"Train dataset size: {len(train_dataloader.dataset)}", cfg, accelerator + ) + log_message( + f"Train batches per epoch (before DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + if val_dataloader: + log_message( + f"Validation dataset size: {len(val_dataloader.dataset)}", cfg, accelerator + ) + log_message(f"Validation batches: {len(val_dataloader)}", cfg, accelerator) + else: + log_message("No validation dataset found", cfg, accelerator) + + # === Optimizer === + log_message("Creating optimizer...", cfg, accelerator) + param_groups = group_params(model) + + for group in param_groups: + if "lr" not in group: + group["lr"] = cfg.training.lr + else: + group["lr"] = cfg.training.lr * group.get("lr_multiplier", 1.0) + if "weight_decay" not in group: + group["weight_decay"] = cfg.training.weight_decay + + optimizer = torch.optim.AdamW( + param_groups, + lr=cfg.training.lr, + betas=tuple(cfg.training.betas), + eps=cfg.training.eps, + ) + + # === Scheduler === + steps_per_epoch = math.ceil(len(train_dataloader) / accelerator.num_processes) + total_steps = ( + cfg.training.epochs + * steps_per_epoch + // cfg.training.gradient_accumulation_steps + ) + scheduler = get_lr_scheduler(optimizer, cfg, total_steps) + + log_message( + f"Total steps: {total_steps}, Steps per epoch: {steps_per_epoch}", + cfg, + accelerator, + ) + + # === Accelerate Prepare === + log_message( + "Preparing model, optimizer, and dataloaders with Accelerate...", + cfg, + accelerator, + ) + + if val_dataloader is not None: + model, optimizer, train_dataloader, val_dataloader, scheduler = ( + accelerator.prepare( + model, optimizer, train_dataloader, val_dataloader, scheduler + ) + ) + else: + model, optimizer, train_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, scheduler + ) + + log_message( + f"Train batches per epoch (after DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + # === Resume === + global_step = 0 + start_epoch = 1 + + if cfg.training.resume and cfg.training.resume_checkpoint: + global_step, start_epoch = load_checkpoint( + model, + optimizer, + scheduler, + cfg.training.resume_checkpoint, + cfg, + accelerator, + ) + start_epoch += 1 + + # === Training Loop === + log_message("Starting training...", cfg, accelerator) + + best_val_loss = float("inf") + + try: + for epoch in range(start_epoch, cfg.training.epochs + 1): + log_message(f"\n{'=' * 60}", cfg, accelerator) + log_message(f"EPOCH {epoch}/{cfg.training.epochs}", cfg, accelerator) + log_message(f"{'=' * 60}", cfg, accelerator) + + global_step, best_val_loss = train_epoch( + model=model, + dataloader=train_dataloader, + optimizer=optimizer, + scheduler=scheduler, + cfg=cfg, + epoch=epoch, + global_step=global_step, + accelerator=accelerator, + val_dataloader=val_dataloader, + best_val_loss=best_val_loss, + ) + + if cfg.logging.save_every_epoch: + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + except KeyboardInterrupt: + log_message("Training interrupted by user", cfg, accelerator) + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + # === Final Save === + log_message("\nTraining completed!", cfg, accelerator) + + if accelerator.is_main_process: + final_model_path = Path(cfg.paths.output_dir) / "model_final.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), final_model_path) + log_message(f"Final model: {final_model_path}", cfg, accelerator) + + accelerator.wait_for_everyone() + accelerator.end_training() + finish_tracking() + + +if __name__ == "__main__": + main() diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/output.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..7cbdb3d3a29480fcaa3c7e95b12f75bc636541d8 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/output.log @@ -0,0 +1,76 @@ +[2026-04-25 22:06:54] Initializing tokenizer... +[2026-04-25 22:06:54] Loading model... +[2026-04-25 22:06:58] Loaded pretrained: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt +[2026-04-25 22:06:58] Applied LR multipliers: [2.0, 1.5, 1.0] +[2026-04-25 22:06:58] Warming up model... +[WARMUP] Starting warmup (compiling Triton kernels)... +[WARMUP] Forward: 17.363s, Backward: 26.562s +[WARMUP] Warmup complete. Subsequent passes will be fast. +[2026-04-25 22:07:42] Total params: 1,654,090,112 +[2026-04-25 22:07:42] Trainable params: 1,654,090,112 +[2026-04-25 22:07:42] Creating dataloaders... +[2026-04-25 22:07:42] Train dataset size: 316397 +[2026-04-25 22:07:42] Train batches per epoch (before DDP split): 79100 +[2026-04-25 22:07:42] Validation dataset size: 35098 +[2026-04-25 22:07:42] Validation batches: 1463 +[2026-04-25 22:07:42] Creating optimizer... +[2026-04-25 22:07:42] Total steps: 29662, Steps per epoch: 39550 +[2026-04-25 22:07:42] Preparing model, optimizer, and dataloaders with Accelerate... +[2026-04-25 22:07:43] Train batches per epoch (after DDP split): 39550 +[2026-04-25 22:07:43] Starting training... +[2026-04-25 22:07:43] +============================================================ +[2026-04-25 22:07:43] EPOCH 1/3 +[2026-04-25 22:07:43] ============================================================ +[2026-04-25 22:08:13] Epoch 1 | Step 10 | Loss: 0.6143 | LM: 0.5857 | LB: 1.1576 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.476/SR1: 0.455 | LR: 2.12e-05 +[2026-04-25 22:08:20] Epoch 1 | Step 20 | Loss: 0.5841 | LM: 0.5756 | LB: 1.1555 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.475/SR1: 0.455 | LR: 2.24e-05 +[2026-04-25 22:08:27] Epoch 1 | Step 30 | Loss: 0.5401 | LM: 0.5225 | LB: 1.1531 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.474/SR1: 0.453 | LR: 2.36e-05 +[2026-04-25 22:08:34] Epoch 1 | Step 40 | Loss: 0.5129 | LM: 0.4917 | LB: 1.1617 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.457 | LR: 2.49e-05 +[2026-04-25 22:08:41] Epoch 1 | Step 50 | Loss: 0.4778 | LM: 0.4483 | LB: 1.1610 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.355 | HR1: 0.477/SR1: 0.457 | LR: 2.61e-05 +[2026-04-25 22:08:48] Epoch 1 | Step 60 | Loss: 0.4532 | LM: 0.4247 | LB: 1.1636 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.460 | LR: 2.73e-05 +[2026-04-25 22:08:56] Epoch 1 | Step 70 | Loss: 0.4372 | LM: 0.4149 | LB: 1.1636 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.459 | LR: 2.85e-05 +[2026-04-25 22:09:03] Epoch 1 | Step 80 | Loss: 0.4289 | LM: 0.4151 | LB: 1.1651 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 2.97e-05 +[2026-04-25 22:09:10] Epoch 1 | Step 90 | Loss: 0.4225 | LM: 0.4074 | LB: 1.1657 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 3.09e-05 +[2026-04-25 22:09:17] Epoch 1 | Step 100 | Loss: 0.4147 | LM: 0.4106 | LB: 1.1654 | CL0: 2.8 | CL1: 2.1 | HR0: 0.360/SR0: 0.357 | HR1: 0.480/SR1: 0.459 | LR: 3.21e-05 +[2026-04-25 22:09:24] Epoch 1 | Step 110 | Loss: 0.4128 | LM: 0.4158 | LB: 1.1662 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.356 | HR1: 0.482/SR1: 0.460 | LR: 3.34e-05 +[2026-04-25 22:09:31] Epoch 1 | Step 120 | Loss: 0.4090 | LM: 0.4045 | LB: 1.1655 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.481/SR1: 0.460 | LR: 3.46e-05 +[2026-04-25 22:09:38] Epoch 1 | Step 130 | Loss: 0.4051 | LM: 0.4000 | LB: 1.1654 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.482/SR1: 0.460 | LR: 3.58e-05 +[2026-04-25 22:09:46] Epoch 1 | Step 140 | Loss: 0.4012 | LM: 0.4007 | LB: 1.1660 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.482/SR1: 0.460 | LR: 3.70e-05 +[2026-04-25 22:09:53] Epoch 1 | Step 150 | Loss: 0.3946 | LM: 0.3875 | LB: 1.1656 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.481/SR1: 0.460 | LR: 3.82e-05 +[2026-04-25 22:10:00] Epoch 1 | Step 160 | Loss: 0.3897 | LM: 0.3822 | LB: 1.1664 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.482/SR1: 0.460 | LR: 3.94e-05 +[2026-04-25 22:10:07] Epoch 1 | Step 170 | Loss: 0.3894 | LM: 0.3884 | LB: 1.1641 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.459 | LR: 4.06e-05 +[2026-04-25 22:10:14] Epoch 1 | Step 180 | Loss: 0.3840 | LM: 0.3834 | LB: 1.1633 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.458 | LR: 4.18e-05 +[2026-04-25 22:10:22] Epoch 1 | Step 190 | Loss: 0.3823 | LM: 0.3808 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 4.31e-05 +[2026-04-25 22:10:29] Epoch 1 | Step 200 | Loss: 0.3800 | LM: 0.3834 | LB: 1.1615 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.457 | LR: 4.43e-05 +[2026-04-25 22:10:36] Epoch 1 | Step 210 | Loss: 0.3794 | LM: 0.3856 | LB: 1.1616 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.479/SR1: 0.457 | LR: 4.55e-05 +[2026-04-25 22:10:43] Epoch 1 | Step 220 | Loss: 0.3768 | LM: 0.3858 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.458 | LR: 4.67e-05 +[2026-04-25 22:10:50] Epoch 1 | Step 230 | Loss: 0.3727 | LM: 0.3789 | LB: 1.1626 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 4.79e-05 +[2026-04-25 22:10:57] Epoch 1 | Step 240 | Loss: 0.3705 | LM: 0.3740 | LB: 1.1623 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 4.91e-05 +[2026-04-25 22:11:05] Epoch 1 | Step 250 | Loss: 0.3683 | LM: 0.3685 | LB: 1.1624 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 5.03e-05 +[2026-04-25 22:11:12] Epoch 1 | Step 260 | Loss: 0.3676 | LM: 0.3706 | LB: 1.1626 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.481/SR1: 0.459 | LR: 5.16e-05 +[2026-04-25 22:11:19] Epoch 1 | Step 270 | Loss: 0.3649 | LM: 0.3704 | LB: 1.1618 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.480/SR1: 0.458 | LR: 5.28e-05 +[2026-04-25 22:11:26] Epoch 1 | Step 280 | Loss: 0.3621 | LM: 0.3716 | LB: 1.1620 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.480/SR1: 0.458 | LR: 5.40e-05 +[2026-04-25 22:11:33] Epoch 1 | Step 290 | Loss: 0.3600 | LM: 0.3707 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.458 | LR: 5.52e-05 +[2026-04-25 22:11:40] Epoch 1 | Step 300 | Loss: 0.3591 | LM: 0.3701 | LB: 1.1627 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.480/SR1: 0.458 | LR: 5.64e-05 +[2026-04-25 22:11:47] Epoch 1 | Step 310 | Loss: 0.3576 | LM: 0.3678 | LB: 1.1622 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.457 | LR: 5.76e-05 +[2026-04-25 22:11:55] Epoch 1 | Step 320 | Loss: 0.3549 | LM: 0.3649 | LB: 1.1624 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.457 | LR: 5.88e-05 +[2026-04-25 22:12:02] Epoch 1 | Step 330 | Loss: 0.3530 | LM: 0.3634 | LB: 1.1624 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.457 | LR: 6.01e-05 +[2026-04-25 22:12:09] Epoch 1 | Step 340 | Loss: 0.3509 | LM: 0.3612 | LB: 1.1625 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.457 | LR: 6.13e-05 +[2026-04-25 22:12:16] Epoch 1 | Step 350 | Loss: 0.3499 | LM: 0.3601 | LB: 1.1627 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.458 | LR: 6.25e-05 +[2026-04-25 22:12:23] Epoch 1 | Step 360 | Loss: 0.3473 | LM: 0.3569 | LB: 1.1623 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.480/SR1: 0.457 | LR: 6.37e-05 +[2026-04-25 22:12:30] Epoch 1 | Step 370 | Loss: 0.3450 | LM: 0.3553 | LB: 1.1618 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 6.49e-05 +[2026-04-25 22:12:37] Epoch 1 | Step 380 | Loss: 0.3431 | LM: 0.3544 | LB: 1.1617 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 6.61e-05 +[2026-04-25 22:12:45] Epoch 1 | Step 390 | Loss: 0.3420 | LM: 0.3526 | LB: 1.1621 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 6.73e-05 +[2026-04-25 22:12:52] Epoch 1 | Step 400 | Loss: 0.3414 | LM: 0.3520 | LB: 1.1614 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.456 | LR: 6.86e-05 +[2026-04-25 22:12:59] Epoch 1 | Step 410 | Loss: 0.3401 | LM: 0.3517 | LB: 1.1612 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.479/SR1: 0.456 | LR: 6.98e-05 +[2026-04-25 22:13:06] Epoch 1 | Step 420 | Loss: 0.3388 | LM: 0.3508 | LB: 1.1614 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.479/SR1: 0.456 | LR: 7.10e-05 +[2026-04-25 22:13:13] Epoch 1 | Step 430 | Loss: 0.3385 | LM: 0.3490 | LB: 1.1611 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.456 | LR: 7.22e-05 +[2026-04-25 22:13:20] Epoch 1 | Step 440 | Loss: 0.3366 | LM: 0.3471 | LB: 1.1608 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.456 | LR: 7.34e-05 +[2026-04-25 22:13:27] Epoch 1 | Step 450 | Loss: 0.3359 | LM: 0.3465 | LB: 1.1605 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 7.46e-05 +[2026-04-25 22:13:34] Epoch 1 | Step 460 | Loss: 0.3351 | LM: 0.3451 | LB: 1.1606 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 7.58e-05 +[2026-04-25 22:13:42] Epoch 1 | Step 470 | Loss: 0.3337 | LM: 0.3433 | LB: 1.1603 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 7.70e-05 +[2026-04-25 22:13:49] Epoch 1 | Step 480 | Loss: 0.3327 | LM: 0.3415 | LB: 1.1603 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 7.83e-05 +[2026-04-25 22:13:56] Epoch 1 | Step 490 | Loss: 0.3316 | LM: 0.3398 | LB: 1.1603 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.456 | LR: 7.95e-05 +[2026-04-25 22:14:03] Epoch 1 | Step 500 | Loss: 0.3307 | LM: 0.3373 | LB: 1.1600 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.455 | LR: 8.07e-05 +[2026-04-25 22:14:10] Epoch 1 | Step 510 | Loss: 0.3293 | LM: 0.3345 | LB: 1.1598 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.478/SR1: 0.455 | LR: 8.19e-05 +[2026-04-25 22:14:17] Epoch 1 | Step 520 | Loss: 0.3292 | LM: 0.3348 | LB: 1.1594 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.477/SR1: 0.455 | LR: 8.31e-05 diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/requirements.txt b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f040f697230340f8a88a6e7387f7e8983d11b547 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/requirements.txt @@ -0,0 +1,245 @@ +setuptools==78.1.1 +wheel==0.45.1 +pip==25.2 +webencodings==0.5.1 +triton==3.2.0 +pytz==2025.2 +pydub==0.25.1 +pure_eval==0.2.3 +ptyprocess==0.7.0 +nvidia-ml-py==13.590.48 +nvidia-cusparselt-cu12==0.6.2 +mpmath==1.3.0 +ipython-genutils==0.2.0 +fastjsonschema==2.21.2 +brotli==1.2.0 +antlr4-python3-runtime==4.9.3 +xxhash==3.6.0 +widgetsnbextension==4.0.14 +websocket-client==1.9.0 +webcolors==24.11.1 +wcwidth==0.2.14 +urllib3==2.5.0 +uri-template==1.3.0 +tzdata==2025.2 +typing_extensions==4.15.0 +types-python-dateutil==2.9.0.20251008 +traitlets==5.14.3 +tqdm==4.67.1 +tornado==6.5.2 +tomlkit==0.13.3 +tinycss2==1.4.0 +tabulate==0.9.0 +sympy==1.13.1 +soupsieve==2.8 +sniffio==1.3.1 +smmap==5.0.2 +six==1.17.0 +shellingham==1.5.4 +Send2Trash==1.8.3 +semantic-version==2.10.0 +safetensors==0.6.2 +rpds-py==0.27.1 +rfc3986-validator==0.1.1 +regex==2025.9.18 +pyzmq==27.1.0 +PyYAML==6.0.3 +python-multipart==0.0.22 +python-json-logger==4.0.0 +python-dotenv==1.2.1 +pyparsing==3.2.5 +PyJWT==2.8.0 +Pygments==2.19.2 +pycparser==2.23 +pyarrow==22.0.0 +psutil==7.1.0 +protobuf==6.33.4 +propcache==0.4.1 +prometheus_client==0.23.1 +portalocker==3.2.0 +platformdirs==4.5.0 +pillow==11.3.0 +pexpect==4.9.0 +pathspec==1.0.4 +parso==0.8.5 +pandocfilters==1.5.1 +packaging==25.0 +orjson==3.11.6 +opt_einsum==3.4.0 +nvidia-nvtx-cu12==12.4.127 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nccl-cu12==2.21.5 +nvidia-curand-cu12==10.3.5.147 +nvidia-cufile-cu12==1.13.1.3 +nvidia-cufft-cu12==11.2.1.3 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cublas-cu12==12.4.5.8 +numpy==2.3.3 +ninja==1.13.0 +networkx==3.5 +nest-asyncio==1.6.0 +narwhals==2.15.0 +mypy_extensions==1.1.0 +multidict==6.7.0 +mistune==3.1.4 +mdurl==0.1.2 +MarkupSafe==3.0.3 +lxml==6.0.2 +librt==0.8.0 +lark==1.3.0 +kiwisolver==1.4.9 +jupyterlab_widgets==3.0.15 +jupyterlab_pygments==0.3.0 +jsonpointer==3.0.0 +json5==0.12.1 +itsdangerous==2.2.0 +idna==3.10 +hf-xet==1.1.10 +h11==0.16.0 +groovy==0.1.2 +fsspec==2025.9.0 +frozenlist==1.8.0 +fqdn==1.5.1 +fonttools==4.60.1 +filelock==3.19.1 +ffmpy==1.0.0 +executing==2.2.1 +einops==0.8.1 +dill==0.4.0 +defusedxml==0.7.1 +decorator==5.2.1 +debugpy==1.8.17 +dacite==1.9.2 +cycler==0.12.1 +comm==0.2.3 +colorama==0.4.6 +click==8.3.1 +charset-normalizer==3.4.3 +certifi==2025.10.5 +bleach==6.2.0 +babel==2.17.0 +attrs==25.4.0 +async-lru==2.0.5 +asttokens==3.0.0 +annotated-types==0.7.0 +annotated-doc==0.0.4 +aiohappyeyeballs==2.6.1 +aiofiles==24.1.0 +yarl==1.22.0 +uvicorn==0.40.0 +typing-inspection==0.4.2 +terminado==0.18.1 +stack-data==0.6.3 +sentry-sdk==2.50.0 +scipy==1.17.0 +sacrebleu==2.6.0 +rfc3987-syntax==1.1.0 +rfc3339-validator==0.1.4 +requests==2.32.5 +reportlab==4.4.9 +referencing==0.36.2 +python-dateutil==2.9.0.post0 +pydantic_core==2.41.5 +prompt_toolkit==3.0.52 +plotly==6.5.2 +pathlib2==2.3.7.post1 +orderedmultidict==1.0.2 +optree==0.17.0 +omegaconf==2.3.0 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +mypy==1.19.1 +multiprocess==0.70.16 +matplotlib-inline==0.1.7 +markdown-it-py==4.0.0 +jupyter_core==5.8.1 +Jinja2==3.1.6 +jedi==0.19.2 +ipython_pygments_lexers==1.1.1 +httpcore==1.0.9 +gitdb==4.0.12 +ftfy==6.3.1 +contourpy==1.3.3 +cffi==2.0.0 +beautifulsoup4==4.14.2 +anyio==4.11.0 +aiosignal==1.4.0 +starlette==0.50.0 +rich==14.2.0 +pydantic==2.12.5 +pandas==2.3.3 +nvidia-cusolver-cu12==11.6.1.9 +matplotlib==3.10.7 +jupyter_server_terminals==0.5.3 +jupyter_client==8.6.3 +jsonschema-specifications==2025.9.1 +ipython==9.6.0 +hydra-core==1.3.2 +huggingface-hub==0.35.3 +httpx==0.28.1 +GitPython==3.1.46 +furl==2.1.4 +cryptography==46.0.4 +arrow==1.3.0 +argon2-cffi-bindings==25.1.0 +aiohttp==3.13.1 +wandb==0.24.0 +typer==0.21.1 +torch==2.6.0 +tokenizers==0.22.1 +seaborn==0.13.2 +safehttpx==0.1.7 +jsonschema==4.25.1 +joypy==0.2.6 +isoduration==20.11.0 +ipywidgets==8.1.7 +ipykernel==6.30.1 +gradio_client==2.0.3 +fastapi==0.128.0 +Authlib==1.6.6 +argon2-cffi==25.1.0 +transformers==4.57.6 +nbformat==5.10.4 +mlstm_kernels==2.0.2 +jupyter-console==6.6.3 +gradio==6.5.1 +datasets==4.3.0 +clearml==1.16.4 +accelerate==1.10.1 +xlstm==2.0.4 +nbclient==0.10.2 +jupyter-events==0.12.0 +trackio==0.15.0 +nbconvert==7.16.6 +jupyter_server==2.17.0 +notebook_shim==0.2.4 +jupyterlab_server==2.27.3 +jupyter-lsp==2.3.0 +nbclassic==1.3.3 +jupyterlab==4.4.9 +notebook==7.4.7 +jupyter_contrib_core==0.4.2 +jupyter==1.1.1 +jupyter_nbextensions_configurator==0.6.4 +causal-conv1d==1.5.0.post8 +flash_attn==2.7.4.post1 +mamba-ssm==2.2.4 +hnet==0.0.1 +autocommand==2.2.2 +backports.tarfile==1.2.0 +importlib_metadata==8.0.0 +inflect==7.3.1 +jaraco.collections==5.1.0 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +more-itertools==10.3.0 +packaging==24.2 +platformdirs==4.2.2 +tomli==2.0.1 +typeguard==4.3.0 +typing_extensions==4.12.2 +wheel==0.45.1 +zipp==3.19.2 diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/wandb-metadata.json b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..646e3ae4a93fd9e43733bb90a377bfb8e4b1a975 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/files/wandb-metadata.json @@ -0,0 +1,69 @@ +{ + "os": "Linux-5.4.0-176-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.0", + "startedAt": "2026-04-25T22:06:53.291858Z", + "args": [ + "tracking=wandb", + "tracking.project=code-completion_lr-sweep", + "tracking.run_name=hnet_xl_code_lr_2e-4", + "training.lr=2e-4", + "paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4", + "data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full" + ], + "program": "/workspace/byte-llms-code/code_completion_exp/train_hnet/train.py", + "codePath": "code_completion_exp/train_hnet/train.py", + "codePathLocal": "train.py", + "git": { + "remote": "https://github.com/naryst/byte-llms-code.git", + "commit": "0a7180b6ab9f63d2794494f09ec4918576d10fa2" + }, + "email": "nikita@local.ru", + "root": "/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4", + "host": "7504e518d24a", + "executable": "/venv/bytellm/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA H100 80GB HBM3", + "gpu_count": 4, + "disk": { + "/": { + "total": "265214230528", + "used": "121383002112" + } + }, + "memory": { + "total": "1081679683584" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-b60cdcab-2033-2009-41de-be646c953a20" + }, + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-9982b420-4520-4238-c378-ec5a46015474" + }, + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f" + }, + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134" + } + ], + "cudaVersion": "12.2", + "writerId": "pfqfn7olxjo5871ytqpsm7un6nj33lqi" +} \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-core.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..a0cd67dae0a6eabc955935b1e68f3788b8b76923 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-core.log @@ -0,0 +1,7 @@ +{"time":"2026-04-25T22:06:53.377874817Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp1gnxdtqe/port-191277.txt","pid":191277,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-04-25T22:06:53.378924529Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":191277} +{"time":"2026-04-25T22:06:53.378905634Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-191277-191337-3381656369/socket","Net":"unix"}} +{"time":"2026-04-25T22:06:53.566394193Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-04-25T22:06:53.589483419Z","level":"INFO","msg":"handleInformInit: received","streamId":"ln6tfunh","id":"1(@)"} +{"time":"2026-04-25T22:06:53.95731224Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"ln6tfunh","id":"1(@)"} +{"time":"2026-04-25T22:14:21.261832239Z","level":"INFO","msg":"server: parent process exited, terminating service process"} diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-internal.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..3b412b20ee03a8239e7a8be48367686fe155a40a --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-internal.log @@ -0,0 +1,7 @@ +{"time":"2026-04-25T22:06:53.589583491Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-25T22:06:53.957135992Z","level":"INFO","msg":"stream: created new stream","id":"ln6tfunh"} +{"time":"2026-04-25T22:06:53.957196965Z","level":"INFO","msg":"handler: started","stream_id":"ln6tfunh"} +{"time":"2026-04-25T22:06:53.957305912Z","level":"INFO","msg":"stream: started","id":"ln6tfunh"} +{"time":"2026-04-25T22:06:53.95731694Z","level":"INFO","msg":"writer: started","stream_id":"ln6tfunh"} +{"time":"2026-04-25T22:06:53.957316732Z","level":"INFO","msg":"sender: started","stream_id":"ln6tfunh"} +{"time":"2026-04-25T22:06:54.080245448Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d65e8117e005cd02ecb2ccfeed2294c08bf19cc3 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug.log @@ -0,0 +1,19 @@ +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_setup.py:_flush():81] Configure stats pid to 191277 +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug.log +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/logs/debug-internal.log +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_init.py:init():844] calling init triggers +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'config_path': '/workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json', 'checkpoint_path': '/workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt'}, 'training': {'epochs': 3, 'batch_size': 4, 'eval_batch_size': 24, 'gradient_accumulation_steps': 4, 'lr': 0.0002, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'lr_multiplier': [2.0, 1.5, 1.0], 'load_balancing_weight': 0.01, 'load_balancing_N': 4.0, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None, 'warmup_model': True}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 0, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': None}, 'logging': {'log_interval': 10, 'save_interval': 3000, 'eval_interval': 1000, 'save_every_epoch': True}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'hnet_xl_code_lr_2e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_hnet/train.py'}} +2026-04-25 22:06:53,293 INFO MainThread:191277 [wandb_init.py:init():892] starting backend +2026-04-25 22:06:53,566 INFO MainThread:191277 [wandb_init.py:init():895] sending inform_init request +2026-04-25 22:06:53,588 INFO MainThread:191277 [wandb_init.py:init():903] backend started and connected +2026-04-25 22:06:53,591 INFO MainThread:191277 [wandb_init.py:init():973] updated telemetry +2026-04-25 22:06:53,608 INFO MainThread:191277 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 22:06:54,079 INFO MainThread:191277 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 22:06:54,239 INFO MainThread:191277 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 22:06:54,239 INFO MainThread:191277 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 22:06:54,239 INFO MainThread:191277 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 22:06:54,239 INFO MainThread:191277 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 22:06:54,242 INFO MainThread:191277 [wandb_init.py:init():1084] run started, returning control to user process diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/run-ln6tfunh.wandb b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/run-ln6tfunh.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b5c773014a71b7ab7fe8d931abffb96153a2cbbd --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_220653-ln6tfunh/run-ln6tfunh.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72032881e7c3bb7ff50124d376cbd2659ebaa1b64f4823985307f64980f864a +size 163840 diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/code/code_completion_exp/train_hnet/train.py b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/code/code_completion_exp/train_hnet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7c306fe6e62d718f1815d106471a779b413a20 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/code/code_completion_exp/train_hnet/train.py @@ -0,0 +1,284 @@ +""" +Training Pipeline для HNet модели на задаче Code Completion. + +Конфигурация через Hydra + OmegaConf, логирование в Trackio. +Поддержка DDP через Accelerate для multi-GPU тренировки. + +Использование: + # Базовый запуск (single GPU) + python train.py + + # Multi-GPU с Accelerate + accelerate launch train.py + + # Multi-GPU с указанием количества GPU + accelerate launch --num_processes=4 train.py + + # Переопределение параметров через CLI + python train.py training.lr=1e-4 training.epochs=5 + + # Выбор другого конфига модели + python train.py model=hnet_small + + # Multirun (sweep) + python train.py --multirun training.lr=1e-4,3e-4,1e-3 + + # Без логирования + python train.py tracking.enabled=false +""" + +import os +import math +from pathlib import Path + +import torch +import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from accelerate import Accelerator +from accelerate.utils import set_seed as accelerate_set_seed + +# HNet imports +from hnet.load_utils import load_from_pretrained, load_from_config +from hnet.utils.tokenizers import ByteTokenizer +from hnet.utils.train import group_params + +# Ensure repo root is on sys.path (needed when running from subdirectory) +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# Shared training library +from training_lib.utils import log_message +from training_lib.checkpointing import save_checkpoint, load_checkpoint +from training_lib.schedulers import get_lr_scheduler +from training_lib.tracking import init_tracking, finish_tracking +from training_lib.hnet.train_loop import train_epoch +from training_lib.hnet.data import create_dataloaders + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def main(cfg: DictConfig): + """Глав��ая функция тренировки с поддержкой DDP чере�� Accelerate.""" + + # === Accelerator Setup === + mixed_precision = "bf16" if cfg.training.use_amp else "no" + + accelerator = Accelerator( + mixed_precision=mixed_precision, + gradient_accumulation_steps=cfg.training.gradient_accumulation_steps, + ) + + # === Setup === + accelerate_set_seed(cfg.seed) + + if cfg.paths.output_dir is None: + cfg.paths.output_dir = HydraConfig.get().runtime.output_dir + + OmegaConf.resolve(cfg) + + log_message( + f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'not set')}", + cfg, + accelerator, + ) + log_message(f"Number of processes: {accelerator.num_processes}", cfg, accelerator) + log_message(f"Process index: {accelerator.process_index}", cfg, accelerator) + log_message(f"Mixed precision: {mixed_precision}", cfg, accelerator) + + log_message("=" * 60, cfg, accelerator) + log_message( + "HNet Training Pipeline (Hydra + Trackio + Accelerate)", cfg, accelerator + ) + log_message("=" * 60, cfg, accelerator) + log_message(f"Config:\n{OmegaConf.to_yaml(cfg)}", cfg, accelerator) + + # === Trackio Init === + init_tracking(cfg, accelerator) + + # === Tokenizer === + log_message("Initializing tokenizer...", cfg, accelerator) + tokenizer = ByteTokenizer() + + # === Model === + log_message("Loading model...", cfg, accelerator) + if cfg.model.checkpoint_path: + model = load_from_pretrained( + model_path=cfg.model.checkpoint_path, + model_config_path=cfg.model.config_path, + ) + log_message(f"Loaded pretrained: {cfg.model.checkpoint_path}", cfg, accelerator) + else: + model = load_from_config( + model_config_path=cfg.model.config_path, + device="cpu", + ) + model.init_weights() + log_message("Initialized from scratch", cfg, accelerator) + + model.train() + + # LR multiplier для разны�� стадий (до prepare!) + lr_multiplier = list(cfg.training.lr_multiplier) + model.apply_lr_multiplier(lr_multiplier) + log_message(f"Applied LR multipliers: {lr_multiplier}", cfg, accelerator) + + # Warmup для Triton kernels + if cfg.training.warmup_model: + log_message("Warming up model...", cfg, accelerator) + model = model.to(accelerator.device) + model.warmup(verbose=accelerator.is_main_process) + + # Log model info + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + log_message(f"Total params: {total_params:,}", cfg, accelerator) + log_message(f"Trainable params: {trainable_params:,}", cfg, accelerator) + + # === Data === + log_message("Creating dataloaders...", cfg, accelerator) + dataloaders = create_dataloaders(cfg, tokenizer) + + train_dataloader = dataloaders["train"] + val_dataloader = dataloaders.get("validation", None) + + log_message( + f"Train dataset size: {len(train_dataloader.dataset)}", cfg, accelerator + ) + log_message( + f"Train batches per epoch (before DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + if val_dataloader: + log_message( + f"Validation dataset size: {len(val_dataloader.dataset)}", cfg, accelerator + ) + log_message(f"Validation batches: {len(val_dataloader)}", cfg, accelerator) + else: + log_message("No validation dataset found", cfg, accelerator) + + # === Optimizer === + log_message("Creating optimizer...", cfg, accelerator) + param_groups = group_params(model) + + for group in param_groups: + if "lr" not in group: + group["lr"] = cfg.training.lr + else: + group["lr"] = cfg.training.lr * group.get("lr_multiplier", 1.0) + if "weight_decay" not in group: + group["weight_decay"] = cfg.training.weight_decay + + optimizer = torch.optim.AdamW( + param_groups, + lr=cfg.training.lr, + betas=tuple(cfg.training.betas), + eps=cfg.training.eps, + ) + + # === Scheduler === + steps_per_epoch = math.ceil(len(train_dataloader) / accelerator.num_processes) + total_steps = ( + cfg.training.epochs + * steps_per_epoch + // cfg.training.gradient_accumulation_steps + ) + scheduler = get_lr_scheduler(optimizer, cfg, total_steps) + + log_message( + f"Total steps: {total_steps}, Steps per epoch: {steps_per_epoch}", + cfg, + accelerator, + ) + + # === Accelerate Prepare === + log_message( + "Preparing model, optimizer, and dataloaders with Accelerate...", + cfg, + accelerator, + ) + + if val_dataloader is not None: + model, optimizer, train_dataloader, val_dataloader, scheduler = ( + accelerator.prepare( + model, optimizer, train_dataloader, val_dataloader, scheduler + ) + ) + else: + model, optimizer, train_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, scheduler + ) + + log_message( + f"Train batches per epoch (after DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + # === Resume === + global_step = 0 + start_epoch = 1 + + if cfg.training.resume and cfg.training.resume_checkpoint: + global_step, start_epoch = load_checkpoint( + model, + optimizer, + scheduler, + cfg.training.resume_checkpoint, + cfg, + accelerator, + ) + start_epoch += 1 + + # === Training Loop === + log_message("Starting training...", cfg, accelerator) + + best_val_loss = float("inf") + + try: + for epoch in range(start_epoch, cfg.training.epochs + 1): + log_message(f"\n{'=' * 60}", cfg, accelerator) + log_message(f"EPOCH {epoch}/{cfg.training.epochs}", cfg, accelerator) + log_message(f"{'=' * 60}", cfg, accelerator) + + global_step, best_val_loss = train_epoch( + model=model, + dataloader=train_dataloader, + optimizer=optimizer, + scheduler=scheduler, + cfg=cfg, + epoch=epoch, + global_step=global_step, + accelerator=accelerator, + val_dataloader=val_dataloader, + best_val_loss=best_val_loss, + ) + + if cfg.logging.save_every_epoch: + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + except KeyboardInterrupt: + log_message("Training interrupted by user", cfg, accelerator) + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + # === Final Save === + log_message("\nTraining completed!", cfg, accelerator) + + if accelerator.is_main_process: + final_model_path = Path(cfg.paths.output_dir) / "model_final.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), final_model_path) + log_message(f"Final model: {final_model_path}", cfg, accelerator) + + accelerator.wait_for_everyone() + accelerator.end_training() + finish_tracking() + + +if __name__ == "__main__": + main() diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/config.yaml b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0e34e724df129a7b99ff1e0ba2c20a8dbe4a03a3 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/config.yaml @@ -0,0 +1,167 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + code_path: code/code_completion_exp/train_hnet/train.py + python_version: 3.12.0 + cli_version: 0.24.0 + framework: huggingface + huggingface_version: 4.57.6 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1777155612 + t: + 1: + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + 3: + - 2 + - 13 + - 16 + - 37 + - 42 + - 61 + 4: 3.12.0 + 5: 0.24.0 + 6: 4.57.6 + 13: linux-x86_64 + e: + i2jx9zm2jjq81elpzo2fmxkizkbg0bw5: + os: Linux-5.4.0-176-generic-x86_64-with-glibc2.35 + python: CPython 3.12.0 + started_at: '2026-04-25T22:20:11.717689Z' + args: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=hnet_xl_code_lr_2e-4 + - training.lr=2e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + program: /workspace/byte-llms-code/code_completion_exp/train_hnet/train.py + code_path: code_completion_exp/train_hnet/train.py + code_path_local: train.py + git: + remote_url: https://github.com/naryst/byte-llms-code.git + commit: 0a7180b6ab9f63d2794494f09ec4918576d10fa2 + email: nikita@local.ru + root: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 + host: 7504e518d24a + executable: /venv/bytellm/bin/python + cpu_count: 64 + cpu_count_logical: 128 + gpu_type: NVIDIA H100 80GB HBM3 + gpu_count: 4 + disk: + /: + total: '265214230528' + used: '121389543424' + memory: + total: '1081679683584' + gpu_nvidia: + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-b60cdcab-2033-2009-41de-be646c953a20 + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-9982b420-4520-4238-c378-ec5a46015474 + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134 + cuda_version: '12.2' + writer_id: i2jx9zm2jjq81elpzo2fmxkizkbg0bw5 +model: + desc: null + value: + config_path: /workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json + checkpoint_path: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt +training: + desc: null + value: + epochs: 1 + batch_size: 4 + eval_batch_size: 24 + gradient_accumulation_steps: 4 + lr: 0.0002 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + lr_multiplier: + - 2.0 + - 1.5 + - 1.0 + load_balancing_weight: 0.01 + load_balancing_N: 4.0 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null + warmup_model: true +data: + desc: null + value: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 0 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + desc: null + value: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + desc: null + value: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: hnet_xl_code_lr_2e-4 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 +paths: + desc: null + value: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4 +seed: + desc: null + value: 42 +device: + desc: null + value: cuda diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/output.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..a2da97f940db1a6929af74494c4f36deba69b1c1 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/output.log @@ -0,0 +1,1045 @@ +[2026-04-25 22:20:12] Initializing tokenizer... +[2026-04-25 22:20:12] Loading model... +[2026-04-25 22:20:17] Loaded pretrained: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt +[2026-04-25 22:20:17] Applied LR multipliers: [2.0, 1.5, 1.0] +[2026-04-25 22:20:17] Warming up model... +[WARMUP] Starting warmup (compiling Triton kernels)... +[WARMUP] Forward: 17.256s, Backward: 26.423s +[WARMUP] Warmup complete. Subsequent passes will be fast. +[2026-04-25 22:21:00] Total params: 1,654,090,112 +[2026-04-25 22:21:00] Trainable params: 1,654,090,112 +[2026-04-25 22:21:00] Creating dataloaders... +[2026-04-25 22:21:00] Train dataset size: 316397 +[2026-04-25 22:21:00] Train batches per epoch (before DDP split): 79100 +[2026-04-25 22:21:00] Validation dataset size: 2000 +[2026-04-25 22:21:00] Validation batches: 84 +[2026-04-25 22:21:00] Creating optimizer... +[2026-04-25 22:21:00] Total steps: 9887, Steps per epoch: 39550 +[2026-04-25 22:21:00] Preparing model, optimizer, and dataloaders with Accelerate... +[2026-04-25 22:21:01] Train batches per epoch (after DDP split): 39550 +[2026-04-25 22:21:01] Starting training... +[2026-04-25 22:21:01] +============================================================ +[2026-04-25 22:21:01] EPOCH 1/1 +[2026-04-25 22:21:01] ============================================================ +[2026-04-25 22:21:30] Epoch 1 | Step 10 | Loss: 0.6125 | LM: 0.5844 | LB: 1.1575 | CL0: 2.9 | CL1: 2.1 | HR0: 0.351/SR0: 0.351 | HR1: 0.475/SR1: 0.455 | LR: 2.36e-05 +[2026-04-25 22:21:37] Epoch 1 | Step 20 | Loss: 0.5778 | LM: 0.5696 | LB: 1.1556 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.475/SR1: 0.455 | LR: 2.73e-05 +[2026-04-25 22:21:44] Epoch 1 | Step 30 | Loss: 0.5290 | LM: 0.5110 | LB: 1.1533 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.474/SR1: 0.453 | LR: 3.09e-05 +[2026-04-25 22:21:52] Epoch 1 | Step 40 | Loss: 0.4988 | LM: 0.4797 | LB: 1.1620 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.479/SR1: 0.458 | LR: 3.46e-05 +[2026-04-25 22:21:59] Epoch 1 | Step 50 | Loss: 0.4643 | LM: 0.4370 | LB: 1.1616 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.355 | HR1: 0.478/SR1: 0.457 | LR: 3.82e-05 +[2026-04-25 22:22:06] Epoch 1 | Step 60 | Loss: 0.4398 | LM: 0.4133 | LB: 1.1642 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.481/SR1: 0.460 | LR: 4.19e-05 +[2026-04-25 22:22:13] Epoch 1 | Step 70 | Loss: 0.4238 | LM: 0.4030 | LB: 1.1640 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.459 | LR: 4.55e-05 +[2026-04-25 22:22:20] Epoch 1 | Step 80 | Loss: 0.4154 | LM: 0.4031 | LB: 1.1652 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 4.91e-05 +[2026-04-25 22:22:27] Epoch 1 | Step 90 | Loss: 0.4093 | LM: 0.3959 | LB: 1.1656 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 5.28e-05 +[2026-04-25 22:22:34] Epoch 1 | Step 100 | Loss: 0.4012 | LM: 0.3985 | LB: 1.1650 | CL0: 2.8 | CL1: 2.1 | HR0: 0.360/SR0: 0.357 | HR1: 0.480/SR1: 0.459 | LR: 5.64e-05 +[2026-04-25 22:22:42] Epoch 1 | Step 110 | Loss: 0.3987 | LM: 0.4029 | LB: 1.1656 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.356 | HR1: 0.481/SR1: 0.460 | LR: 6.01e-05 +[2026-04-25 22:22:49] Epoch 1 | Step 120 | Loss: 0.3947 | LM: 0.3914 | LB: 1.1647 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.480/SR1: 0.459 | LR: 6.37e-05 +[2026-04-25 22:22:56] Epoch 1 | Step 130 | Loss: 0.3906 | LM: 0.3865 | LB: 1.1644 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.459 | LR: 6.74e-05 +[2026-04-25 22:23:03] Epoch 1 | Step 140 | Loss: 0.3867 | LM: 0.3871 | LB: 1.1649 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.459 | LR: 7.10e-05 +[2026-04-25 22:23:10] Epoch 1 | Step 150 | Loss: 0.3800 | LM: 0.3742 | LB: 1.1643 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.480/SR1: 0.459 | LR: 7.47e-05 +[2026-04-25 22:23:17] Epoch 1 | Step 160 | Loss: 0.3749 | LM: 0.3689 | LB: 1.1649 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.480/SR1: 0.459 | LR: 7.83e-05 +[2026-04-25 22:23:24] Epoch 1 | Step 170 | Loss: 0.3743 | LM: 0.3745 | LB: 1.1625 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 8.19e-05 +[2026-04-25 22:23:32] Epoch 1 | Step 180 | Loss: 0.3690 | LM: 0.3694 | LB: 1.1616 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.478/SR1: 0.457 | LR: 8.56e-05 +[2026-04-25 22:23:39] Epoch 1 | Step 190 | Loss: 0.3674 | LM: 0.3671 | LB: 1.1604 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.477/SR1: 0.455 | LR: 8.92e-05 +[2026-04-25 22:23:46] Epoch 1 | Step 200 | Loss: 0.3651 | LM: 0.3698 | LB: 1.1596 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.477/SR1: 0.455 | LR: 9.29e-05 +[2026-04-25 22:23:53] Epoch 1 | Step 210 | Loss: 0.3646 | LM: 0.3719 | LB: 1.1595 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.477/SR1: 0.456 | LR: 9.65e-05 +[2026-04-25 22:24:00] Epoch 1 | Step 220 | Loss: 0.3621 | LM: 0.3720 | LB: 1.1600 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.477/SR1: 0.456 | LR: 1.00e-04 +[2026-04-25 22:24:07] Epoch 1 | Step 230 | Loss: 0.3583 | LM: 0.3651 | LB: 1.1602 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.478/SR1: 0.456 | LR: 1.04e-04 +[2026-04-25 22:24:14] Epoch 1 | Step 240 | Loss: 0.3563 | LM: 0.3605 | LB: 1.1597 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.478/SR1: 0.456 | LR: 1.07e-04 +[2026-04-25 22:24:22] Epoch 1 | Step 250 | Loss: 0.3541 | LM: 0.3553 | LB: 1.1597 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.478/SR1: 0.456 | LR: 1.11e-04 +[2026-04-25 22:24:29] Epoch 1 | Step 260 | Loss: 0.3534 | LM: 0.3572 | LB: 1.1596 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.478/SR1: 0.456 | LR: 1.15e-04 +[2026-04-25 22:24:36] Epoch 1 | Step 270 | Loss: 0.3511 | LM: 0.3574 | LB: 1.1587 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.477/SR1: 0.455 | LR: 1.18e-04 +[2026-04-25 22:24:43] Epoch 1 | Step 280 | Loss: 0.3485 | LM: 0.3590 | LB: 1.1588 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.354 | HR1: 0.477/SR1: 0.455 | LR: 1.22e-04 +[2026-04-25 22:24:50] Epoch 1 | Step 290 | Loss: 0.3466 | LM: 0.3579 | LB: 1.1586 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.477/SR1: 0.455 | LR: 1.26e-04 +[2026-04-25 22:24:57] Epoch 1 | Step 300 | Loss: 0.3458 | LM: 0.3573 | LB: 1.1589 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.477/SR1: 0.454 | LR: 1.29e-04 +[2026-04-25 22:25:04] Epoch 1 | Step 310 | Loss: 0.3446 | LM: 0.3553 | LB: 1.1582 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.476/SR1: 0.454 | LR: 1.33e-04 +[2026-04-25 22:25:12] Epoch 1 | Step 320 | Loss: 0.3422 | LM: 0.3528 | LB: 1.1581 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.476/SR1: 0.454 | LR: 1.37e-04 +[2026-04-25 22:25:19] Epoch 1 | Step 330 | Loss: 0.3406 | LM: 0.3517 | LB: 1.1579 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.476/SR1: 0.453 | LR: 1.40e-04 +[2026-04-25 22:25:26] Epoch 1 | Step 340 | Loss: 0.3388 | LM: 0.3499 | LB: 1.1577 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.475/SR1: 0.453 | LR: 1.44e-04 +[2026-04-25 22:25:33] Epoch 1 | Step 350 | Loss: 0.3380 | LM: 0.3491 | LB: 1.1574 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.475/SR1: 0.453 | LR: 1.48e-04 +[2026-04-25 22:25:40] Epoch 1 | Step 360 | Loss: 0.3359 | LM: 0.3461 | LB: 1.1569 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.475/SR1: 0.453 | LR: 1.51e-04 +[2026-04-25 22:25:47] Epoch 1 | Step 370 | Loss: 0.3338 | LM: 0.3445 | LB: 1.1562 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.474/SR1: 0.452 | LR: 1.55e-04 +[2026-04-25 22:25:54] Epoch 1 | Step 380 | Loss: 0.3320 | LM: 0.3435 | LB: 1.1558 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.474/SR1: 0.452 | LR: 1.58e-04 +[2026-04-25 22:26:02] Epoch 1 | Step 390 | Loss: 0.3313 | LM: 0.3421 | LB: 1.1558 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.474/SR1: 0.452 | LR: 1.62e-04 +[2026-04-25 22:26:09] Epoch 1 | Step 400 | Loss: 0.3308 | LM: 0.3418 | LB: 1.1549 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.473/SR1: 0.451 | LR: 1.66e-04 +[2026-04-25 22:26:16] Epoch 1 | Step 410 | Loss: 0.3299 | LM: 0.3416 | LB: 1.1543 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.473/SR1: 0.450 | LR: 1.69e-04 +[2026-04-25 22:26:23] Epoch 1 | Step 420 | Loss: 0.3287 | LM: 0.3408 | LB: 1.1542 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.472/SR1: 0.450 | LR: 1.73e-04 +[2026-04-25 22:26:30] Epoch 1 | Step 430 | Loss: 0.3287 | LM: 0.3391 | LB: 1.1537 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.472/SR1: 0.450 | LR: 1.77e-04 +[2026-04-25 22:26:37] Epoch 1 | Step 440 | Loss: 0.3269 | LM: 0.3376 | LB: 1.1532 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.472/SR1: 0.450 | LR: 1.80e-04 +[2026-04-25 22:26:44] Epoch 1 | Step 450 | Loss: 0.3264 | LM: 0.3371 | LB: 1.1526 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.471/SR1: 0.449 | LR: 1.84e-04 +[2026-04-25 22:26:52] Epoch 1 | Step 460 | Loss: 0.3260 | LM: 0.3358 | LB: 1.1524 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.471/SR1: 0.449 | LR: 1.88e-04 +[2026-04-25 22:26:59] Epoch 1 | Step 470 | Loss: 0.3249 | LM: 0.3345 | LB: 1.1519 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.470/SR1: 0.448 | LR: 1.91e-04 +[2026-04-25 22:27:06] Epoch 1 | Step 480 | Loss: 0.3241 | LM: 0.3329 | LB: 1.1518 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.470/SR1: 0.448 | LR: 1.95e-04 +[2026-04-25 22:27:13] Epoch 1 | Step 490 | Loss: 0.3233 | LM: 0.3315 | LB: 1.1514 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.470/SR1: 0.448 | LR: 1.99e-04 +[2026-04-25 22:27:20] Epoch 1 | Step 500 | Loss: 0.3227 | LM: 0.3292 | LB: 1.1509 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.469/SR1: 0.447 | LR: 2.00e-04 +[2026-04-25 22:27:27] Epoch 1 | Step 510 | Loss: 0.3216 | LM: 0.3266 | LB: 1.1503 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.469/SR1: 0.447 | LR: 2.00e-04 +[2026-04-25 22:27:35] Epoch 1 | Step 520 | Loss: 0.3217 | LM: 0.3270 | LB: 1.1496 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.468/SR1: 0.446 | LR: 2.00e-04 +[2026-04-25 22:27:42] Epoch 1 | Step 530 | Loss: 0.3213 | LM: 0.3269 | LB: 1.1489 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.446 | LR: 2.00e-04 +[2026-04-25 22:27:49] Epoch 1 | Step 540 | Loss: 0.3204 | LM: 0.3265 | LB: 1.1482 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.467/SR1: 0.445 | LR: 2.00e-04 +[2026-04-25 22:27:56] Epoch 1 | Step 550 | Loss: 0.3195 | LM: 0.3255 | LB: 1.1476 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.445 | LR: 2.00e-04 +[2026-04-25 22:28:03] Epoch 1 | Step 560 | Loss: 0.3191 | LM: 0.3236 | LB: 1.1469 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.466/SR1: 0.444 | LR: 2.00e-04 +[2026-04-25 22:28:10] Epoch 1 | Step 570 | Loss: 0.3188 | LM: 0.3233 | LB: 1.1466 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.444 | LR: 2.00e-04 +[2026-04-25 22:28:17] Epoch 1 | Step 580 | Loss: 0.3184 | LM: 0.3228 | LB: 1.1460 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.465/SR1: 0.443 | LR: 2.00e-04 +[2026-04-25 22:28:24] Epoch 1 | Step 590 | Loss: 0.3183 | LM: 0.3226 | LB: 1.1454 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.443 | LR: 2.00e-04 +[2026-04-25 22:28:31] Epoch 1 | Step 600 | Loss: 0.3177 | LM: 0.3210 | LB: 1.1448 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.464/SR1: 0.442 | LR: 2.00e-04 +[2026-04-25 22:28:38] Epoch 1 | Step 610 | Loss: 0.3180 | LM: 0.3204 | LB: 1.1444 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.442 | LR: 2.00e-04 +[2026-04-25 22:28:46] Epoch 1 | Step 620 | Loss: 0.3181 | LM: 0.3202 | LB: 1.1442 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.442 | LR: 2.00e-04 +[2026-04-25 22:28:53] Epoch 1 | Step 630 | Loss: 0.3178 | LM: 0.3210 | LB: 1.1438 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.441 | LR: 2.00e-04 +[2026-04-25 22:29:00] Epoch 1 | Step 640 | Loss: 0.3179 | LM: 0.3200 | LB: 1.1435 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.463/SR1: 0.441 | LR: 2.00e-04 +[2026-04-25 22:29:07] Epoch 1 | Step 650 | Loss: 0.3175 | LM: 0.3195 | LB: 1.1430 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.440 | LR: 2.00e-04 +[2026-04-25 22:29:14] Epoch 1 | Step 660 | Loss: 0.3178 | LM: 0.3189 | LB: 1.1426 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.440 | LR: 2.00e-04 +[2026-04-25 22:29:21] Epoch 1 | Step 670 | Loss: 0.3173 | LM: 0.3183 | LB: 1.1422 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.440 | LR: 2.00e-04 +[2026-04-25 22:29:28] Epoch 1 | Step 680 | Loss: 0.3172 | LM: 0.3186 | LB: 1.1421 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.439 | LR: 2.00e-04 +[2026-04-25 22:29:35] Epoch 1 | Step 690 | Loss: 0.3170 | LM: 0.3192 | LB: 1.1415 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.461/SR1: 0.439 | LR: 2.00e-04 +[2026-04-25 22:29:42] Epoch 1 | Step 700 | Loss: 0.3173 | LM: 0.3210 | LB: 1.1410 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.438 | LR: 2.00e-04 +[2026-04-25 22:29:50] Epoch 1 | Step 710 | Loss: 0.3168 | LM: 0.3213 | LB: 1.1406 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.460/SR1: 0.438 | LR: 2.00e-04 +[2026-04-25 22:29:57] Epoch 1 | Step 720 | Loss: 0.3167 | LM: 0.3215 | LB: 1.1400 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.437 | LR: 2.00e-04 +[2026-04-25 22:30:04] Epoch 1 | Step 730 | Loss: 0.3167 | LM: 0.3212 | LB: 1.1397 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.437 | LR: 2.00e-04 +[2026-04-25 22:30:11] Epoch 1 | Step 740 | Loss: 0.3164 | LM: 0.3209 | LB: 1.1392 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.437 | LR: 2.00e-04 +[2026-04-25 22:30:18] Epoch 1 | Step 750 | Loss: 0.3160 | LM: 0.3202 | LB: 1.1385 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.458/SR1: 0.436 | LR: 2.00e-04 +[2026-04-25 22:30:25] Epoch 1 | Step 760 | Loss: 0.3164 | LM: 0.3198 | LB: 1.1381 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.457/SR1: 0.436 | LR: 2.00e-04 +[2026-04-25 22:30:32] Epoch 1 | Step 770 | Loss: 0.3169 | LM: 0.3199 | LB: 1.1377 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.457/SR1: 0.435 | LR: 2.00e-04 +[2026-04-25 22:30:40] Epoch 1 | Step 780 | Loss: 0.3166 | LM: 0.3192 | LB: 1.1372 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.457/SR1: 0.435 | LR: 2.00e-04 +[2026-04-25 22:30:47] Epoch 1 | Step 790 | Loss: 0.3163 | LM: 0.3191 | LB: 1.1368 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.456/SR1: 0.435 | LR: 2.00e-04 +[2026-04-25 22:30:54] Epoch 1 | Step 800 | Loss: 0.3157 | LM: 0.3178 | LB: 1.1363 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.456/SR1: 0.434 | LR: 2.00e-04 +[2026-04-25 22:31:01] Epoch 1 | Step 810 | Loss: 0.3155 | LM: 0.3182 | LB: 1.1360 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.456/SR1: 0.434 | LR: 2.00e-04 +[2026-04-25 22:31:08] Epoch 1 | Step 820 | Loss: 0.3153 | LM: 0.3178 | LB: 1.1357 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.455/SR1: 0.434 | LR: 2.00e-04 +[2026-04-25 22:31:15] Epoch 1 | Step 830 | Loss: 0.3156 | LM: 0.3184 | LB: 1.1353 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.455/SR1: 0.433 | LR: 2.00e-04 +[2026-04-25 22:31:22] Epoch 1 | Step 840 | Loss: 0.3155 | LM: 0.3175 | LB: 1.1351 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.455/SR1: 0.433 | LR: 2.00e-04 +[2026-04-25 22:31:29] Epoch 1 | Step 850 | Loss: 0.3150 | LM: 0.3172 | LB: 1.1348 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.454/SR1: 0.433 | LR: 2.00e-04 +[2026-04-25 22:31:37] Epoch 1 | Step 860 | Loss: 0.3150 | LM: 0.3163 | LB: 1.1345 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.454/SR1: 0.433 | LR: 2.00e-04 +[2026-04-25 22:31:44] Epoch 1 | Step 870 | Loss: 0.3153 | LM: 0.3162 | LB: 1.1342 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.454/SR1: 0.432 | LR: 2.00e-04 +[2026-04-25 22:31:51] Epoch 1 | Step 880 | Loss: 0.3154 | LM: 0.3165 | LB: 1.1339 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.453/SR1: 0.432 | LR: 2.00e-04 +[2026-04-25 22:31:58] Epoch 1 | Step 890 | Loss: 0.3153 | LM: 0.3161 | LB: 1.1336 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.453/SR1: 0.432 | LR: 2.00e-04 +[2026-04-25 22:32:05] Epoch 1 | Step 900 | Loss: 0.3154 | LM: 0.3158 | LB: 1.1336 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.453/SR1: 0.431 | LR: 2.00e-04 +[2026-04-25 22:32:12] Epoch 1 | Step 910 | Loss: 0.3156 | LM: 0.3155 | LB: 1.1332 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.453/SR1: 0.431 | LR: 2.00e-04 +[2026-04-25 22:32:20] Epoch 1 | Step 920 | Loss: 0.3161 | LM: 0.3157 | LB: 1.1330 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.452/SR1: 0.431 | LR: 2.00e-04 +[2026-04-25 22:32:27] Epoch 1 | Step 930 | Loss: 0.3160 | LM: 0.3152 | LB: 1.1327 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.452/SR1: 0.431 | LR: 2.00e-04 +[2026-04-25 22:32:34] Epoch 1 | Step 940 | Loss: 0.3162 | LM: 0.3149 | LB: 1.1324 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.452/SR1: 0.430 | LR: 2.00e-04 +[2026-04-25 22:32:41] Epoch 1 | Step 950 | Loss: 0.3157 | LM: 0.3148 | LB: 1.1322 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.452/SR1: 0.430 | LR: 2.00e-04 +[2026-04-25 22:32:48] Epoch 1 | Step 960 | Loss: 0.3153 | LM: 0.3153 | LB: 1.1320 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.451/SR1: 0.430 | LR: 2.00e-04 +[2026-04-25 22:32:55] Epoch 1 | Step 970 | Loss: 0.3153 | LM: 0.3158 | LB: 1.1316 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.451/SR1: 0.430 | LR: 2.00e-04 +[2026-04-25 22:33:02] Epoch 1 | Step 980 | Loss: 0.3149 | LM: 0.3146 | LB: 1.1314 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.451/SR1: 0.429 | LR: 2.00e-04 +[2026-04-25 22:33:09] Epoch 1 | Step 990 | Loss: 0.3145 | LM: 0.3146 | LB: 1.1312 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.450/SR1: 0.429 | LR: 2.00e-04 +[2026-04-25 22:33:16] Epoch 1 | Step 1000 | Loss: 0.3143 | LM: 0.3139 | LB: 1.1308 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.450/SR1: 0.429 | LR: 2.00e-04 +[2026-04-25 22:33:24] Epoch 1 | Step 1010 | Loss: 0.3145 | LM: 0.3143 | LB: 1.1305 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.450/SR1: 0.428 | LR: 2.00e-04 +[2026-04-25 22:33:31] Epoch 1 | Step 1020 | Loss: 0.3145 | LM: 0.3145 | LB: 1.1301 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.449/SR1: 0.428 | LR: 2.00e-04 +[2026-04-25 22:33:38] Epoch 1 | Step 1030 | Loss: 0.3147 | LM: 0.3145 | LB: 1.1297 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.449/SR1: 0.428 | LR: 2.00e-04 +[2026-04-25 22:33:45] Epoch 1 | Step 1040 | Loss: 0.3142 | LM: 0.3139 | LB: 1.1294 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.354 | HR1: 0.449/SR1: 0.427 | LR: 2.00e-04 +[2026-04-25 22:33:52] Epoch 1 | Step 1050 | Loss: 0.3140 | LM: 0.3137 | LB: 1.1291 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.354 | HR1: 0.448/SR1: 0.427 | LR: 2.00e-04 +[2026-04-25 22:33:59] Epoch 1 | Step 1060 | Loss: 0.3135 | LM: 0.3131 | LB: 1.1289 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.354 | HR1: 0.448/SR1: 0.427 | LR: 2.00e-04 +[2026-04-25 22:34:06] Epoch 1 | Step 1070 | Loss: 0.3133 | LM: 0.3125 | LB: 1.1286 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.354 | HR1: 0.448/SR1: 0.426 | LR: 2.00e-04 +[2026-04-25 22:34:13] Epoch 1 | Step 1080 | Loss: 0.3136 | LM: 0.3118 | LB: 1.1285 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.448/SR1: 0.426 | LR: 2.00e-04 +[2026-04-25 22:34:21] Epoch 1 | Step 1090 | Loss: 0.3141 | LM: 0.3123 | LB: 1.1283 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.448/SR1: 0.426 | LR: 2.00e-04 +[2026-04-25 22:34:28] Epoch 1 | Step 1100 | Loss: 0.3141 | LM: 0.3126 | LB: 1.1282 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.448/SR1: 0.426 | LR: 2.00e-04 +[2026-04-25 22:34:35] Epoch 1 | Step 1110 | Loss: 0.3141 | LM: 0.3119 | LB: 1.1279 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.447/SR1: 0.426 | LR: 2.00e-04 +[2026-04-25 22:34:42] Epoch 1 | Step 1120 | Loss: 0.3144 | LM: 0.3118 | LB: 1.1277 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.447/SR1: 0.425 | LR: 2.00e-04 +[2026-04-25 22:34:49] Epoch 1 | Step 1130 | Loss: 0.3147 | LM: 0.3113 | LB: 1.1274 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.447/SR1: 0.425 | LR: 2.00e-04 +[2026-04-25 22:34:56] Epoch 1 | Step 1140 | Loss: 0.3147 | LM: 0.3111 | LB: 1.1270 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.446/SR1: 0.425 | LR: 2.00e-04 +[2026-04-25 22:35:03] Epoch 1 | Step 1150 | Loss: 0.3143 | LM: 0.3106 | LB: 1.1267 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.446/SR1: 0.424 | LR: 2.00e-04 +[2026-04-25 22:35:10] Epoch 1 | Step 1160 | Loss: 0.3145 | LM: 0.3109 | LB: 1.1264 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.446/SR1: 0.424 | LR: 2.00e-04 +[2026-04-25 22:35:17] Epoch 1 | Step 1170 | Loss: 0.3147 | LM: 0.3106 | LB: 1.1262 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.445/SR1: 0.424 | LR: 2.00e-04 +[2026-04-25 22:35:25] Epoch 1 | Step 1180 | Loss: 0.3147 | LM: 0.3109 | LB: 1.1258 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.445/SR1: 0.423 | LR: 2.00e-04 +[2026-04-25 22:35:32] Epoch 1 | Step 1190 | Loss: 0.3148 | LM: 0.3110 | LB: 1.1257 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.445/SR1: 0.423 | LR: 2.00e-04 +[2026-04-25 22:35:39] Epoch 1 | Step 1200 | Loss: 0.3146 | LM: 0.3103 | LB: 1.1255 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.445/SR1: 0.423 | LR: 2.00e-04 +[2026-04-25 22:35:46] Epoch 1 | Step 1210 | Loss: 0.3144 | LM: 0.3103 | LB: 1.1253 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.445/SR1: 0.423 | LR: 2.00e-04 +[2026-04-25 22:35:53] Epoch 1 | Step 1220 | Loss: 0.3139 | LM: 0.3095 | LB: 1.1252 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.445/SR1: 0.423 | LR: 2.00e-04 +[2026-04-25 22:36:00] Epoch 1 | Step 1230 | Loss: 0.3141 | LM: 0.3099 | LB: 1.1250 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.445/SR1: 0.423 | LR: 2.00e-04 +[2026-04-25 22:36:07] Epoch 1 | Step 1240 | Loss: 0.3142 | LM: 0.3100 | LB: 1.1248 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:14] Epoch 1 | Step 1250 | Loss: 0.3140 | LM: 0.3102 | LB: 1.1247 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:22] Epoch 1 | Step 1260 | Loss: 0.3137 | LM: 0.3102 | LB: 1.1245 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:29] Epoch 1 | Step 1270 | Loss: 0.3134 | LM: 0.3098 | LB: 1.1244 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:36] Epoch 1 | Step 1280 | Loss: 0.3134 | LM: 0.3095 | LB: 1.1242 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:43] Epoch 1 | Step 1290 | Loss: 0.3134 | LM: 0.3098 | LB: 1.1241 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:50] Epoch 1 | Step 1300 | Loss: 0.3131 | LM: 0.3097 | LB: 1.1241 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.422 | LR: 2.00e-04 +[2026-04-25 22:36:57] Epoch 1 | Step 1310 | Loss: 0.3130 | LM: 0.3091 | LB: 1.1238 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.444/SR1: 0.421 | LR: 2.00e-04 +[2026-04-25 22:37:04] Epoch 1 | Step 1320 | Loss: 0.3131 | LM: 0.3089 | LB: 1.1236 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.443/SR1: 0.421 | LR: 2.00e-04 +[2026-04-25 22:37:12] Epoch 1 | Step 1330 | Loss: 0.3129 | LM: 0.3087 | LB: 1.1234 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.443/SR1: 0.421 | LR: 2.00e-04 +[2026-04-25 22:37:19] Epoch 1 | Step 1340 | Loss: 0.3130 | LM: 0.3089 | LB: 1.1231 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.443/SR1: 0.421 | LR: 2.00e-04 +[2026-04-25 22:37:26] Epoch 1 | Step 1350 | Loss: 0.3131 | LM: 0.3094 | LB: 1.1233 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.443/SR1: 0.421 | LR: 2.00e-04 +[2026-04-25 22:37:33] Epoch 1 | Step 1360 | Loss: 0.3130 | LM: 0.3087 | LB: 1.1232 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.443/SR1: 0.420 | LR: 2.00e-04 +[2026-04-25 22:37:40] Epoch 1 | Step 1370 | Loss: 0.3130 | LM: 0.3086 | LB: 1.1230 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.443/SR1: 0.420 | LR: 2.00e-04 +[2026-04-25 22:37:47] Epoch 1 | Step 1380 | Loss: 0.3133 | LM: 0.3084 | LB: 1.1228 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.420 | LR: 2.00e-04 +[2026-04-25 22:37:54] Epoch 1 | Step 1390 | Loss: 0.3138 | LM: 0.3088 | LB: 1.1225 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.420 | LR: 2.00e-04 +[2026-04-25 22:38:01] Epoch 1 | Step 1400 | Loss: 0.3138 | LM: 0.3085 | LB: 1.1222 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.420 | LR: 2.00e-04 +[2026-04-25 22:38:08] Epoch 1 | Step 1410 | Loss: 0.3134 | LM: 0.3076 | LB: 1.1220 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.419 | LR: 2.00e-04 +[2026-04-25 22:38:16] Epoch 1 | Step 1420 | Loss: 0.3134 | LM: 0.3074 | LB: 1.1220 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.419 | LR: 2.00e-04 +[2026-04-25 22:38:23] Epoch 1 | Step 1430 | Loss: 0.3133 | LM: 0.3076 | LB: 1.1219 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.419 | LR: 2.00e-04 +[2026-04-25 22:38:30] Epoch 1 | Step 1440 | Loss: 0.3131 | LM: 0.3073 | LB: 1.1217 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.442/SR1: 0.419 | LR: 2.00e-04 +[2026-04-25 22:38:37] Epoch 1 | Step 1450 | Loss: 0.3130 | LM: 0.3069 | LB: 1.1216 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.419 | LR: 2.00e-04 +[2026-04-25 22:38:44] Epoch 1 | Step 1460 | Loss: 0.3126 | LM: 0.3066 | LB: 1.1214 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.419 | LR: 2.00e-04 +[2026-04-25 22:38:51] Epoch 1 | Step 1470 | Loss: 0.3128 | LM: 0.3069 | LB: 1.1214 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:38:58] Epoch 1 | Step 1480 | Loss: 0.3127 | LM: 0.3065 | LB: 1.1212 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:06] Epoch 1 | Step 1490 | Loss: 0.3128 | LM: 0.3066 | LB: 1.1211 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:13] Epoch 1 | Step 1500 | Loss: 0.3127 | LM: 0.3063 | LB: 1.1210 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:20] Epoch 1 | Step 1510 | Loss: 0.3127 | LM: 0.3065 | LB: 1.1210 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:27] Epoch 1 | Step 1520 | Loss: 0.3126 | LM: 0.3063 | LB: 1.1209 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:34] Epoch 1 | Step 1530 | Loss: 0.3126 | LM: 0.3062 | LB: 1.1208 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.441/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:41] Epoch 1 | Step 1540 | Loss: 0.3127 | LM: 0.3067 | LB: 1.1208 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:48] Epoch 1 | Step 1550 | Loss: 0.3127 | LM: 0.3065 | LB: 1.1207 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:39:55] Epoch 1 | Step 1560 | Loss: 0.3126 | LM: 0.3064 | LB: 1.1206 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.418 | LR: 2.00e-04 +[2026-04-25 22:40:03] Epoch 1 | Step 1570 | Loss: 0.3128 | LM: 0.3066 | LB: 1.1204 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:10] Epoch 1 | Step 1580 | Loss: 0.3128 | LM: 0.3069 | LB: 1.1204 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:17] Epoch 1 | Step 1590 | Loss: 0.3128 | LM: 0.3076 | LB: 1.1204 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:24] Epoch 1 | Step 1600 | Loss: 0.3128 | LM: 0.3074 | LB: 1.1203 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:31] Epoch 1 | Step 1610 | Loss: 0.3125 | LM: 0.3069 | LB: 1.1203 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:38] Epoch 1 | Step 1620 | Loss: 0.3123 | LM: 0.3068 | LB: 1.1202 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:45] Epoch 1 | Step 1630 | Loss: 0.3124 | LM: 0.3068 | LB: 1.1201 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.440/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:40:53] Epoch 1 | Step 1640 | Loss: 0.3123 | LM: 0.3069 | LB: 1.1199 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:41:00] Epoch 1 | Step 1650 | Loss: 0.3121 | LM: 0.3070 | LB: 1.1199 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:41:07] Epoch 1 | Step 1660 | Loss: 0.3119 | LM: 0.3063 | LB: 1.1198 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:41:14] Epoch 1 | Step 1670 | Loss: 0.3121 | LM: 0.3064 | LB: 1.1198 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:41:21] Epoch 1 | Step 1680 | Loss: 0.3122 | LM: 0.3062 | LB: 1.1198 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.417 | LR: 2.00e-04 +[2026-04-25 22:41:28] Epoch 1 | Step 1690 | Loss: 0.3121 | LM: 0.3060 | LB: 1.1198 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:41:36] Epoch 1 | Step 1700 | Loss: 0.3118 | LM: 0.3055 | LB: 1.1197 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:41:43] Epoch 1 | Step 1710 | Loss: 0.3117 | LM: 0.3048 | LB: 1.1196 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:41:50] Epoch 1 | Step 1720 | Loss: 0.3117 | LM: 0.3049 | LB: 1.1194 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:41:57] Epoch 1 | Step 1730 | Loss: 0.3116 | LM: 0.3050 | LB: 1.1193 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:42:04] Epoch 1 | Step 1740 | Loss: 0.3116 | LM: 0.3053 | LB: 1.1192 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:42:11] Epoch 1 | Step 1750 | Loss: 0.3119 | LM: 0.3055 | LB: 1.1191 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:42:18] Epoch 1 | Step 1760 | Loss: 0.3118 | LM: 0.3055 | LB: 1.1189 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.439/SR1: 0.416 | LR: 2.00e-04 +[2026-04-25 22:42:25] Epoch 1 | Step 1770 | Loss: 0.3118 | LM: 0.3056 | LB: 1.1187 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:42:32] Epoch 1 | Step 1780 | Loss: 0.3119 | LM: 0.3055 | LB: 1.1186 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:42:40] Epoch 1 | Step 1790 | Loss: 0.3120 | LM: 0.3053 | LB: 1.1186 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:42:47] Epoch 1 | Step 1800 | Loss: 0.3120 | LM: 0.3053 | LB: 1.1184 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:42:54] Epoch 1 | Step 1810 | Loss: 0.3120 | LM: 0.3053 | LB: 1.1183 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:43:01] Epoch 1 | Step 1820 | Loss: 0.3121 | LM: 0.3052 | LB: 1.1182 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:43:08] Epoch 1 | Step 1830 | Loss: 0.3121 | LM: 0.3061 | LB: 1.1182 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:43:15] Epoch 1 | Step 1840 | Loss: 0.3122 | LM: 0.3063 | LB: 1.1180 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.415 | LR: 2.00e-04 +[2026-04-25 22:43:22] Epoch 1 | Step 1850 | Loss: 0.3120 | LM: 0.3061 | LB: 1.1179 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.438/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:43:29] Epoch 1 | Step 1860 | Loss: 0.3121 | LM: 0.3058 | LB: 1.1179 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:43:37] Epoch 1 | Step 1870 | Loss: 0.3120 | LM: 0.3055 | LB: 1.1178 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:43:44] Epoch 1 | Step 1880 | Loss: 0.3120 | LM: 0.3056 | LB: 1.1176 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:43:51] Epoch 1 | Step 1890 | Loss: 0.3121 | LM: 0.3055 | LB: 1.1175 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:43:58] Epoch 1 | Step 1900 | Loss: 0.3120 | LM: 0.3054 | LB: 1.1173 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:44:05] Epoch 1 | Step 1910 | Loss: 0.3121 | LM: 0.3056 | LB: 1.1172 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:44:12] Epoch 1 | Step 1920 | Loss: 0.3122 | LM: 0.3053 | LB: 1.1171 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.414 | LR: 2.00e-04 +[2026-04-25 22:44:19] Epoch 1 | Step 1930 | Loss: 0.3120 | LM: 0.3052 | LB: 1.1170 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:44:27] Epoch 1 | Step 1940 | Loss: 0.3120 | LM: 0.3053 | LB: 1.1169 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:44:34] Epoch 1 | Step 1950 | Loss: 0.3119 | LM: 0.3054 | LB: 1.1167 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:44:41] Epoch 1 | Step 1960 | Loss: 0.3120 | LM: 0.3055 | LB: 1.1166 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:44:48] Epoch 1 | Step 1970 | Loss: 0.3120 | LM: 0.3059 | LB: 1.1164 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:44:55] Epoch 1 | Step 1980 | Loss: 0.3121 | LM: 0.3059 | LB: 1.1164 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:45:02] Epoch 1 | Step 1990 | Loss: 0.3121 | LM: 0.3060 | LB: 1.1163 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.413 | LR: 2.00e-04 +[2026-04-25 22:45:09] Epoch 1 | Step 2000 | Loss: 0.3121 | LM: 0.3059 | LB: 1.1162 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.412 | LR: 2.00e-04 +[2026-04-25 22:45:10] Validation | Batch 10/42 | Loss: 0.2991 | LM_LOSS: 0.2883 | LB_LOSS: 1.0839 +[2026-04-25 22:45:11] Validation | Batch 20/42 | Loss: 0.3200 | LM_LOSS: 0.3091 | LB_LOSS: 1.0878 +[2026-04-25 22:45:12] Validation | Batch 30/42 | Loss: 0.3119 | LM_LOSS: 0.3010 | LB_LOSS: 1.0851 +[2026-04-25 22:45:14] Validation | Batch 40/42 | Loss: 0.3169 | LM_LOSS: 0.3060 | LB_LOSS: 1.0844 +[2026-04-25 22:45:14] Validation | Batch 42/42 | Loss: 0.3165 | LM_LOSS: 0.3056 | LB_LOSS: 1.0846 +[2026-04-25 22:45:14] Validation | Loss: 0.3165 | LM_LOSS: 0.3056 | LB_LOSS: 1.0846 | PPL: 1.35 | Time: 4.71s +[2026-04-25 22:45:16] New best model saved! Val loss: 0.3165 +[2026-04-25 22:45:24] Epoch 1 | Step 2010 | Loss: 0.3122 | LM: 0.3061 | LB: 1.1161 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.436/SR1: 0.412 | LR: 2.00e-04 +[2026-04-25 22:45:31] Epoch 1 | Step 2020 | Loss: 0.3122 | LM: 0.3061 | LB: 1.1159 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.435/SR1: 0.412 | LR: 2.00e-04 +[2026-04-25 22:45:38] Epoch 1 | Step 2030 | Loss: 0.3122 | LM: 0.3059 | LB: 1.1158 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.435/SR1: 0.412 | LR: 2.00e-04 +[2026-04-25 22:45:45] Epoch 1 | Step 2040 | Loss: 0.3121 | LM: 0.3057 | LB: 1.1156 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.435/SR1: 0.412 | LR: 2.00e-04 +[2026-04-25 22:45:52] Epoch 1 | Step 2050 | Loss: 0.3121 | LM: 0.3056 | LB: 1.1154 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.435/SR1: 0.412 | LR: 2.00e-04 +[2026-04-25 22:45:59] Epoch 1 | Step 2060 | Loss: 0.3121 | LM: 0.3059 | LB: 1.1154 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.435/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:06] Epoch 1 | Step 2070 | Loss: 0.3118 | LM: 0.3054 | LB: 1.1152 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.435/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:13] Epoch 1 | Step 2080 | Loss: 0.3117 | LM: 0.3053 | LB: 1.1150 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:21] Epoch 1 | Step 2090 | Loss: 0.3117 | LM: 0.3055 | LB: 1.1149 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:28] Epoch 1 | Step 2100 | Loss: 0.3117 | LM: 0.3052 | LB: 1.1147 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:35] Epoch 1 | Step 2110 | Loss: 0.3117 | LM: 0.3051 | LB: 1.1146 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.434/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:42] Epoch 1 | Step 2120 | Loss: 0.3116 | LM: 0.3054 | LB: 1.1145 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.411 | LR: 2.00e-04 +[2026-04-25 22:46:49] Epoch 1 | Step 2130 | Loss: 0.3115 | LM: 0.3056 | LB: 1.1145 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:46:56] Epoch 1 | Step 2140 | Loss: 0.3114 | LM: 0.3060 | LB: 1.1143 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:03] Epoch 1 | Step 2150 | Loss: 0.3115 | LM: 0.3060 | LB: 1.1143 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:10] Epoch 1 | Step 2160 | Loss: 0.3115 | LM: 0.3060 | LB: 1.1142 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.434/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:17] Epoch 1 | Step 2170 | Loss: 0.3115 | LM: 0.3059 | LB: 1.1141 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:24] Epoch 1 | Step 2180 | Loss: 0.3114 | LM: 0.3058 | LB: 1.1140 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:32] Epoch 1 | Step 2190 | Loss: 0.3113 | LM: 0.3056 | LB: 1.1138 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:39] Epoch 1 | Step 2200 | Loss: 0.3114 | LM: 0.3057 | LB: 1.1137 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:46] Epoch 1 | Step 2210 | Loss: 0.3114 | LM: 0.3059 | LB: 1.1136 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.410 | LR: 2.00e-04 +[2026-04-25 22:47:53] Epoch 1 | Step 2220 | Loss: 0.3116 | LM: 0.3060 | LB: 1.1136 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:00] Epoch 1 | Step 2230 | Loss: 0.3117 | LM: 0.3060 | LB: 1.1135 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:07] Epoch 1 | Step 2240 | Loss: 0.3118 | LM: 0.3057 | LB: 1.1134 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:14] Epoch 1 | Step 2250 | Loss: 0.3119 | LM: 0.3058 | LB: 1.1133 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:21] Epoch 1 | Step 2260 | Loss: 0.3118 | LM: 0.3055 | LB: 1.1132 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:28] Epoch 1 | Step 2270 | Loss: 0.3119 | LM: 0.3059 | LB: 1.1131 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.433/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:35] Epoch 1 | Step 2280 | Loss: 0.3119 | LM: 0.3058 | LB: 1.1130 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:43] Epoch 1 | Step 2290 | Loss: 0.3122 | LM: 0.3061 | LB: 1.1129 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:50] Epoch 1 | Step 2300 | Loss: 0.3121 | LM: 0.3057 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.409 | LR: 2.00e-04 +[2026-04-25 22:48:57] Epoch 1 | Step 2310 | Loss: 0.3120 | LM: 0.3057 | LB: 1.1127 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:04] Epoch 1 | Step 2320 | Loss: 0.3122 | LM: 0.3059 | LB: 1.1126 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:11] Epoch 1 | Step 2330 | Loss: 0.3122 | LM: 0.3060 | LB: 1.1126 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:18] Epoch 1 | Step 2340 | Loss: 0.3121 | LM: 0.3063 | LB: 1.1125 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:25] Epoch 1 | Step 2350 | Loss: 0.3121 | LM: 0.3064 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:32] Epoch 1 | Step 2360 | Loss: 0.3122 | LM: 0.3066 | LB: 1.1124 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:40] Epoch 1 | Step 2370 | Loss: 0.3121 | LM: 0.3064 | LB: 1.1123 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:47] Epoch 1 | Step 2380 | Loss: 0.3120 | LM: 0.3063 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:49:54] Epoch 1 | Step 2390 | Loss: 0.3122 | LM: 0.3063 | LB: 1.1122 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.432/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:50:01] Epoch 1 | Step 2400 | Loss: 0.3121 | LM: 0.3065 | LB: 1.1121 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:50:08] Epoch 1 | Step 2410 | Loss: 0.3122 | LM: 0.3068 | LB: 1.1120 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:50:15] Epoch 1 | Step 2420 | Loss: 0.3123 | LM: 0.3066 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:50:22] Epoch 1 | Step 2430 | Loss: 0.3123 | LM: 0.3067 | LB: 1.1119 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.408 | LR: 2.00e-04 +[2026-04-25 22:50:29] Epoch 1 | Step 2440 | Loss: 0.3122 | LM: 0.3067 | LB: 1.1118 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:50:36] Epoch 1 | Step 2450 | Loss: 0.3122 | LM: 0.3066 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:50:44] Epoch 1 | Step 2460 | Loss: 0.3122 | LM: 0.3066 | LB: 1.1116 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:50:51] Epoch 1 | Step 2470 | Loss: 0.3122 | LM: 0.3065 | LB: 1.1115 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:50:58] Epoch 1 | Step 2480 | Loss: 0.3122 | LM: 0.3064 | LB: 1.1114 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:05] Epoch 1 | Step 2490 | Loss: 0.3121 | LM: 0.3066 | LB: 1.1113 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:12] Epoch 1 | Step 2500 | Loss: 0.3120 | LM: 0.3066 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:19] Epoch 1 | Step 2510 | Loss: 0.3120 | LM: 0.3069 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.431/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:26] Epoch 1 | Step 2520 | Loss: 0.3118 | LM: 0.3065 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:33] Epoch 1 | Step 2530 | Loss: 0.3117 | LM: 0.3065 | LB: 1.1112 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:41] Epoch 1 | Step 2540 | Loss: 0.3116 | LM: 0.3064 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:48] Epoch 1 | Step 2550 | Loss: 0.3114 | LM: 0.3058 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:51:55] Epoch 1 | Step 2560 | Loss: 0.3114 | LM: 0.3061 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:52:02] Epoch 1 | Step 2570 | Loss: 0.3115 | LM: 0.3062 | LB: 1.1111 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.407 | LR: 2.00e-04 +[2026-04-25 22:52:09] Epoch 1 | Step 2580 | Loss: 0.3116 | LM: 0.3063 | LB: 1.1109 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:16] Epoch 1 | Step 2590 | Loss: 0.3117 | LM: 0.3061 | LB: 1.1109 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:23] Epoch 1 | Step 2600 | Loss: 0.3117 | LM: 0.3063 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:30] Epoch 1 | Step 2610 | Loss: 0.3117 | LM: 0.3059 | LB: 1.1109 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:37] Epoch 1 | Step 2620 | Loss: 0.3117 | LM: 0.3059 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:44] Epoch 1 | Step 2630 | Loss: 0.3116 | LM: 0.3059 | LB: 1.1108 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:52] Epoch 1 | Step 2640 | Loss: 0.3117 | LM: 0.3056 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:52:59] Epoch 1 | Step 2650 | Loss: 0.3115 | LM: 0.3055 | LB: 1.1107 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:06] Epoch 1 | Step 2660 | Loss: 0.3115 | LM: 0.3057 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:13] Epoch 1 | Step 2670 | Loss: 0.3115 | LM: 0.3060 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:20] Epoch 1 | Step 2680 | Loss: 0.3114 | LM: 0.3060 | LB: 1.1106 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:27] Epoch 1 | Step 2690 | Loss: 0.3114 | LM: 0.3062 | LB: 1.1105 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:34] Epoch 1 | Step 2700 | Loss: 0.3113 | LM: 0.3062 | LB: 1.1104 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:41] Epoch 1 | Step 2710 | Loss: 0.3112 | LM: 0.3059 | LB: 1.1103 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.430/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:49] Epoch 1 | Step 2720 | Loss: 0.3112 | LM: 0.3058 | LB: 1.1102 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:53:56] Epoch 1 | Step 2730 | Loss: 0.3111 | LM: 0.3058 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:54:03] Epoch 1 | Step 2740 | Loss: 0.3112 | LM: 0.3058 | LB: 1.1101 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.406 | LR: 2.00e-04 +[2026-04-25 22:54:10] Epoch 1 | Step 2750 | Loss: 0.3112 | LM: 0.3058 | LB: 1.1099 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:54:17] Epoch 1 | Step 2760 | Loss: 0.3112 | LM: 0.3054 | LB: 1.1099 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:54:24] Epoch 1 | Step 2770 | Loss: 0.3111 | LM: 0.3054 | LB: 1.1098 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:54:31] Epoch 1 | Step 2780 | Loss: 0.3111 | LM: 0.3052 | LB: 1.1097 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:54:38] Epoch 1 | Step 2790 | Loss: 0.3111 | LM: 0.3052 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:54:45] Epoch 1 | Step 2800 | Loss: 0.3110 | LM: 0.3052 | LB: 1.1096 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:54:52] Epoch 1 | Step 2810 | Loss: 0.3111 | LM: 0.3052 | LB: 1.1095 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:55:00] Epoch 1 | Step 2820 | Loss: 0.3111 | LM: 0.3053 | LB: 1.1094 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:55:07] Epoch 1 | Step 2830 | Loss: 0.3111 | LM: 0.3053 | LB: 1.1093 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:55:14] Epoch 1 | Step 2840 | Loss: 0.3112 | LM: 0.3055 | LB: 1.1093 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.429/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:55:21] Epoch 1 | Step 2850 | Loss: 0.3113 | LM: 0.3059 | LB: 1.1093 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.405 | LR: 2.00e-04 +[2026-04-25 22:55:28] Epoch 1 | Step 2860 | Loss: 0.3113 | LM: 0.3059 | LB: 1.1092 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:55:35] Epoch 1 | Step 2870 | Loss: 0.3112 | LM: 0.3059 | LB: 1.1092 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:55:42] Epoch 1 | Step 2880 | Loss: 0.3112 | LM: 0.3058 | LB: 1.1091 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:55:49] Epoch 1 | Step 2890 | Loss: 0.3112 | LM: 0.3056 | LB: 1.1090 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:55:57] Epoch 1 | Step 2900 | Loss: 0.3110 | LM: 0.3054 | LB: 1.1089 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:04] Epoch 1 | Step 2910 | Loss: 0.3111 | LM: 0.3054 | LB: 1.1088 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:11] Epoch 1 | Step 2920 | Loss: 0.3112 | LM: 0.3057 | LB: 1.1087 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:18] Epoch 1 | Step 2930 | Loss: 0.3111 | LM: 0.3054 | LB: 1.1087 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:25] Epoch 1 | Step 2940 | Loss: 0.3110 | LM: 0.3054 | LB: 1.1086 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:33] Epoch 1 | Step 2950 | Loss: 0.3111 | LM: 0.3055 | LB: 1.1085 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:40] Epoch 1 | Step 2960 | Loss: 0.3111 | LM: 0.3056 | LB: 1.1084 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:47] Epoch 1 | Step 2970 | Loss: 0.3111 | LM: 0.3058 | LB: 1.1083 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.404 | LR: 2.00e-04 +[2026-04-25 22:56:54] Epoch 1 | Step 2980 | Loss: 0.3110 | LM: 0.3057 | LB: 1.1083 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.428/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:01] Epoch 1 | Step 2990 | Loss: 0.3111 | LM: 0.3057 | LB: 1.1082 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:08] Epoch 1 | Step 3000 | Loss: 0.3111 | LM: 0.3057 | LB: 1.1082 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:16] Epoch 1 | Step 3010 | Loss: 0.3111 | LM: 0.3057 | LB: 1.1081 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:23] Epoch 1 | Step 3020 | Loss: 0.3109 | LM: 0.3056 | LB: 1.1081 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:30] Epoch 1 | Step 3030 | Loss: 0.3109 | LM: 0.3056 | LB: 1.1081 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:37] Epoch 1 | Step 3040 | Loss: 0.3108 | LM: 0.3054 | LB: 1.1080 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:44] Epoch 1 | Step 3050 | Loss: 0.3107 | LM: 0.3051 | LB: 1.1080 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:51] Epoch 1 | Step 3060 | Loss: 0.3107 | LM: 0.3055 | LB: 1.1080 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:57:59] Epoch 1 | Step 3070 | Loss: 0.3106 | LM: 0.3055 | LB: 1.1079 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:06] Epoch 1 | Step 3080 | Loss: 0.3106 | LM: 0.3053 | LB: 1.1078 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:13] Epoch 1 | Step 3090 | Loss: 0.3105 | LM: 0.3050 | LB: 1.1078 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:20] Epoch 1 | Step 3100 | Loss: 0.3105 | LM: 0.3050 | LB: 1.1077 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:27] Epoch 1 | Step 3110 | Loss: 0.3104 | LM: 0.3049 | LB: 1.1077 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:34] Epoch 1 | Step 3120 | Loss: 0.3106 | LM: 0.3049 | LB: 1.1077 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:41] Epoch 1 | Step 3130 | Loss: 0.3106 | LM: 0.3049 | LB: 1.1077 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:49] Epoch 1 | Step 3140 | Loss: 0.3106 | LM: 0.3047 | LB: 1.1077 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:58:56] Epoch 1 | Step 3150 | Loss: 0.3107 | LM: 0.3050 | LB: 1.1076 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:59:03] Epoch 1 | Step 3160 | Loss: 0.3107 | LM: 0.3048 | LB: 1.1076 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:59:10] Epoch 1 | Step 3170 | Loss: 0.3107 | LM: 0.3050 | LB: 1.1075 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:59:17] Epoch 1 | Step 3180 | Loss: 0.3108 | LM: 0.3052 | LB: 1.1075 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.403 | LR: 2.00e-04 +[2026-04-25 22:59:24] Epoch 1 | Step 3190 | Loss: 0.3106 | LM: 0.3049 | LB: 1.1074 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 22:59:31] Epoch 1 | Step 3200 | Loss: 0.3105 | LM: 0.3046 | LB: 1.1073 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 22:59:38] Epoch 1 | Step 3210 | Loss: 0.3105 | LM: 0.3044 | LB: 1.1073 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 22:59:46] Epoch 1 | Step 3220 | Loss: 0.3103 | LM: 0.3043 | LB: 1.1073 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 22:59:53] Epoch 1 | Step 3230 | Loss: 0.3104 | LM: 0.3042 | LB: 1.1073 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:00] Epoch 1 | Step 3240 | Loss: 0.3103 | LM: 0.3041 | LB: 1.1073 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:07] Epoch 1 | Step 3250 | Loss: 0.3103 | LM: 0.3041 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:14] Epoch 1 | Step 3260 | Loss: 0.3103 | LM: 0.3039 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:21] Epoch 1 | Step 3270 | Loss: 0.3103 | LM: 0.3040 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:28] Epoch 1 | Step 3280 | Loss: 0.3102 | LM: 0.3038 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:35] Epoch 1 | Step 3290 | Loss: 0.3101 | LM: 0.3038 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:43] Epoch 1 | Step 3300 | Loss: 0.3101 | LM: 0.3040 | LB: 1.1073 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.427/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:50] Epoch 1 | Step 3310 | Loss: 0.3101 | LM: 0.3039 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:00:57] Epoch 1 | Step 3320 | Loss: 0.3101 | LM: 0.3039 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:04] Epoch 1 | Step 3330 | Loss: 0.3101 | LM: 0.3039 | LB: 1.1071 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:11] Epoch 1 | Step 3340 | Loss: 0.3101 | LM: 0.3042 | LB: 1.1071 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:18] Epoch 1 | Step 3350 | Loss: 0.3100 | LM: 0.3041 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:25] Epoch 1 | Step 3360 | Loss: 0.3100 | LM: 0.3039 | LB: 1.1071 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:33] Epoch 1 | Step 3370 | Loss: 0.3100 | LM: 0.3038 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:40] Epoch 1 | Step 3380 | Loss: 0.3099 | LM: 0.3037 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:47] Epoch 1 | Step 3390 | Loss: 0.3100 | LM: 0.3038 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:01:54] Epoch 1 | Step 3400 | Loss: 0.3102 | LM: 0.3039 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:01] Epoch 1 | Step 3410 | Loss: 0.3103 | LM: 0.3040 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:08] Epoch 1 | Step 3420 | Loss: 0.3102 | LM: 0.3042 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:15] Epoch 1 | Step 3430 | Loss: 0.3102 | LM: 0.3043 | LB: 1.1068 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:22] Epoch 1 | Step 3440 | Loss: 0.3102 | LM: 0.3044 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:29] Epoch 1 | Step 3450 | Loss: 0.3102 | LM: 0.3045 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:36] Epoch 1 | Step 3460 | Loss: 0.3101 | LM: 0.3042 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:44] Epoch 1 | Step 3470 | Loss: 0.3101 | LM: 0.3040 | LB: 1.1066 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.402 | LR: 2.00e-04 +[2026-04-25 23:02:51] Epoch 1 | Step 3480 | Loss: 0.3101 | LM: 0.3042 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:02:58] Epoch 1 | Step 3490 | Loss: 0.3100 | LM: 0.3042 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:05] Epoch 1 | Step 3500 | Loss: 0.3099 | LM: 0.3040 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:12] Epoch 1 | Step 3510 | Loss: 0.3099 | LM: 0.3041 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:19] Epoch 1 | Step 3520 | Loss: 0.3099 | LM: 0.3041 | LB: 1.1063 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:26] Epoch 1 | Step 3530 | Loss: 0.3099 | LM: 0.3040 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:33] Epoch 1 | Step 3540 | Loss: 0.3099 | LM: 0.3037 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.426/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:40] Epoch 1 | Step 3550 | Loss: 0.3099 | LM: 0.3038 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:47] Epoch 1 | Step 3560 | Loss: 0.3099 | LM: 0.3039 | LB: 1.1061 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:03:54] Epoch 1 | Step 3570 | Loss: 0.3099 | LM: 0.3039 | LB: 1.1060 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:02] Epoch 1 | Step 3580 | Loss: 0.3099 | LM: 0.3039 | LB: 1.1060 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:09] Epoch 1 | Step 3590 | Loss: 0.3099 | LM: 0.3039 | LB: 1.1059 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:16] Epoch 1 | Step 3600 | Loss: 0.3098 | LM: 0.3039 | LB: 1.1059 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:23] Epoch 1 | Step 3610 | Loss: 0.3097 | LM: 0.3037 | LB: 1.1058 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:30] Epoch 1 | Step 3620 | Loss: 0.3097 | LM: 0.3035 | LB: 1.1057 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:37] Epoch 1 | Step 3630 | Loss: 0.3097 | LM: 0.3037 | LB: 1.1057 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:44] Epoch 1 | Step 3640 | Loss: 0.3098 | LM: 0.3039 | LB: 1.1056 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.401 | LR: 2.00e-04 +[2026-04-25 23:04:51] Epoch 1 | Step 3650 | Loss: 0.3099 | LM: 0.3041 | LB: 1.1056 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:04:59] Epoch 1 | Step 3660 | Loss: 0.3098 | LM: 0.3041 | LB: 1.1056 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:06] Epoch 1 | Step 3670 | Loss: 0.3097 | LM: 0.3037 | LB: 1.1055 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:13] Epoch 1 | Step 3680 | Loss: 0.3097 | LM: 0.3038 | LB: 1.1054 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:20] Epoch 1 | Step 3690 | Loss: 0.3098 | LM: 0.3039 | LB: 1.1054 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:27] Epoch 1 | Step 3700 | Loss: 0.3097 | LM: 0.3039 | LB: 1.1053 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:34] Epoch 1 | Step 3710 | Loss: 0.3096 | LM: 0.3037 | LB: 1.1053 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:41] Epoch 1 | Step 3720 | Loss: 0.3097 | LM: 0.3039 | LB: 1.1053 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:48] Epoch 1 | Step 3730 | Loss: 0.3097 | LM: 0.3039 | LB: 1.1053 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:05:55] Epoch 1 | Step 3740 | Loss: 0.3097 | LM: 0.3037 | LB: 1.1052 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.425/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:03] Epoch 1 | Step 3750 | Loss: 0.3096 | LM: 0.3036 | LB: 1.1051 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:10] Epoch 1 | Step 3760 | Loss: 0.3097 | LM: 0.3037 | LB: 1.1051 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:17] Epoch 1 | Step 3770 | Loss: 0.3097 | LM: 0.3038 | LB: 1.1051 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:24] Epoch 1 | Step 3780 | Loss: 0.3097 | LM: 0.3037 | LB: 1.1051 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:31] Epoch 1 | Step 3790 | Loss: 0.3097 | LM: 0.3038 | LB: 1.1050 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:38] Epoch 1 | Step 3800 | Loss: 0.3098 | LM: 0.3038 | LB: 1.1050 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:45] Epoch 1 | Step 3810 | Loss: 0.3096 | LM: 0.3036 | LB: 1.1049 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:06:52] Epoch 1 | Step 3820 | Loss: 0.3096 | LM: 0.3034 | LB: 1.1049 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:07:00] Epoch 1 | Step 3830 | Loss: 0.3096 | LM: 0.3035 | LB: 1.1049 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:07:07] Epoch 1 | Step 3840 | Loss: 0.3096 | LM: 0.3037 | LB: 1.1049 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:07:14] Epoch 1 | Step 3850 | Loss: 0.3095 | LM: 0.3036 | LB: 1.1048 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:07:21] Epoch 1 | Step 3860 | Loss: 0.3095 | LM: 0.3035 | LB: 1.1048 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:07:28] Epoch 1 | Step 3870 | Loss: 0.3095 | LM: 0.3033 | LB: 1.1047 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.400 | LR: 2.00e-04 +[2026-04-25 23:07:35] Epoch 1 | Step 3880 | Loss: 0.3094 | LM: 0.3031 | LB: 1.1047 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:07:42] Epoch 1 | Step 3890 | Loss: 0.3093 | LM: 0.3032 | LB: 1.1047 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:07:49] Epoch 1 | Step 3900 | Loss: 0.3093 | LM: 0.3032 | LB: 1.1047 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:07:57] Epoch 1 | Step 3910 | Loss: 0.3094 | LM: 0.3032 | LB: 1.1046 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:04] Epoch 1 | Step 3920 | Loss: 0.3094 | LM: 0.3032 | LB: 1.1046 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:11] Epoch 1 | Step 3930 | Loss: 0.3094 | LM: 0.3032 | LB: 1.1046 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:18] Epoch 1 | Step 3940 | Loss: 0.3094 | LM: 0.3031 | LB: 1.1045 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:25] Epoch 1 | Step 3950 | Loss: 0.3093 | LM: 0.3028 | LB: 1.1045 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:32] Epoch 1 | Step 3960 | Loss: 0.3093 | LM: 0.3029 | LB: 1.1044 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:39] Epoch 1 | Step 3970 | Loss: 0.3092 | LM: 0.3029 | LB: 1.1044 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:46] Epoch 1 | Step 3980 | Loss: 0.3093 | LM: 0.3029 | LB: 1.1043 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 2.00e-04 +[2026-04-25 23:08:53] Epoch 1 | Step 3990 | Loss: 0.3092 | LM: 0.3031 | LB: 1.1043 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 1.99e-04 +[2026-04-25 23:09:00] Epoch 1 | Step 4000 | Loss: 0.3093 | LM: 0.3031 | LB: 1.1042 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 1.99e-04 +[2026-04-25 23:09:01] Validation | Batch 10/42 | Loss: 0.2947 | LM_LOSS: 0.2839 | LB_LOSS: 1.0817 +[2026-04-25 23:09:02] Validation | Batch 20/42 | Loss: 0.3149 | LM_LOSS: 0.3040 | LB_LOSS: 1.0854 +[2026-04-25 23:09:04] Validation | Batch 30/42 | Loss: 0.3056 | LM_LOSS: 0.2948 | LB_LOSS: 1.0831 +[2026-04-25 23:09:05] Validation | Batch 40/42 | Loss: 0.3105 | LM_LOSS: 0.2997 | LB_LOSS: 1.0823 +[2026-04-25 23:09:05] Validation | Batch 42/42 | Loss: 0.3105 | LM_LOSS: 0.2996 | LB_LOSS: 1.0825 +[2026-04-25 23:09:05] Validation | Loss: 0.3105 | LM_LOSS: 0.2996 | LB_LOSS: 1.0825 | PPL: 1.35 | Time: 4.70s +[2026-04-25 23:09:08] New best model saved! Val loss: 0.3105 +[2026-04-25 23:09:16] Epoch 1 | Step 4010 | Loss: 0.3092 | LM: 0.3031 | LB: 1.1041 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.424/SR1: 0.399 | LR: 1.99e-04 +[2026-04-25 23:09:23] Epoch 1 | Step 4020 | Loss: 0.3092 | LM: 0.3031 | LB: 1.1041 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.98e-04 +[2026-04-25 23:09:30] Epoch 1 | Step 4030 | Loss: 0.3091 | LM: 0.3030 | LB: 1.1041 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.97e-04 +[2026-04-25 23:09:37] Epoch 1 | Step 4040 | Loss: 0.3090 | LM: 0.3029 | LB: 1.1040 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.97e-04 +[2026-04-25 23:09:44] Epoch 1 | Step 4050 | Loss: 0.3089 | LM: 0.3029 | LB: 1.1040 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.96e-04 +[2026-04-25 23:09:51] Epoch 1 | Step 4060 | Loss: 0.3088 | LM: 0.3027 | LB: 1.1039 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.95e-04 +[2026-04-25 23:09:58] Epoch 1 | Step 4070 | Loss: 0.3088 | LM: 0.3028 | LB: 1.1039 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.94e-04 +[2026-04-25 23:10:05] Epoch 1 | Step 4080 | Loss: 0.3088 | LM: 0.3029 | LB: 1.1038 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.93e-04 +[2026-04-25 23:10:12] Epoch 1 | Step 4090 | Loss: 0.3088 | LM: 0.3029 | LB: 1.1038 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.92e-04 +[2026-04-25 23:10:19] Epoch 1 | Step 4100 | Loss: 0.3089 | LM: 0.3029 | LB: 1.1038 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.399 | LR: 1.91e-04 +[2026-04-25 23:10:27] Epoch 1 | Step 4110 | Loss: 0.3089 | LM: 0.3030 | LB: 1.1037 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.89e-04 +[2026-04-25 23:10:34] Epoch 1 | Step 4120 | Loss: 0.3090 | LM: 0.3031 | LB: 1.1037 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.88e-04 +[2026-04-25 23:10:41] Epoch 1 | Step 4130 | Loss: 0.3089 | LM: 0.3029 | LB: 1.1036 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.86e-04 +[2026-04-25 23:10:48] Epoch 1 | Step 4140 | Loss: 0.3089 | LM: 0.3030 | LB: 1.1036 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.85e-04 +[2026-04-25 23:10:55] Epoch 1 | Step 4150 | Loss: 0.3091 | LM: 0.3032 | LB: 1.1036 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.83e-04 +[2026-04-25 23:11:02] Epoch 1 | Step 4160 | Loss: 0.3092 | LM: 0.3034 | LB: 1.1035 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.82e-04 +[2026-04-25 23:11:09] Epoch 1 | Step 4170 | Loss: 0.3091 | LM: 0.3035 | LB: 1.1036 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.80e-04 +[2026-04-25 23:11:17] Epoch 1 | Step 4180 | Loss: 0.3091 | LM: 0.3036 | LB: 1.1035 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.78e-04 +[2026-04-25 23:11:24] Epoch 1 | Step 4190 | Loss: 0.3091 | LM: 0.3036 | LB: 1.1035 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.76e-04 +[2026-04-25 23:11:31] Epoch 1 | Step 4200 | Loss: 0.3092 | LM: 0.3037 | LB: 1.1035 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.354 | HR1: 0.423/SR1: 0.398 | LR: 1.74e-04 +[2026-04-25 23:11:38] Epoch 1 | Step 4210 | Loss: 0.3092 | LM: 0.3037 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.72e-04 +[2026-04-25 23:11:45] Epoch 1 | Step 4220 | Loss: 0.3093 | LM: 0.3039 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.70e-04 +[2026-04-25 23:11:52] Epoch 1 | Step 4230 | Loss: 0.3094 | LM: 0.3039 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.68e-04 +[2026-04-25 23:11:59] Epoch 1 | Step 4240 | Loss: 0.3095 | LM: 0.3041 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.66e-04 +[2026-04-25 23:12:07] Epoch 1 | Step 4250 | Loss: 0.3095 | LM: 0.3041 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.63e-04 +[2026-04-25 23:12:14] Epoch 1 | Step 4260 | Loss: 0.3094 | LM: 0.3038 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.61e-04 +[2026-04-25 23:12:21] Epoch 1 | Step 4270 | Loss: 0.3095 | LM: 0.3038 | LB: 1.1033 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.59e-04 +[2026-04-25 23:12:28] Epoch 1 | Step 4280 | Loss: 0.3095 | LM: 0.3037 | LB: 1.1033 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.56e-04 +[2026-04-25 23:12:36] Epoch 1 | Step 4290 | Loss: 0.3094 | LM: 0.3035 | LB: 1.1033 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.54e-04 +[2026-04-25 23:12:43] Epoch 1 | Step 4300 | Loss: 0.3094 | LM: 0.3035 | LB: 1.1033 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.51e-04 +[2026-04-25 23:12:51] Epoch 1 | Step 4310 | Loss: 0.3094 | LM: 0.3034 | LB: 1.1032 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.49e-04 +[2026-04-25 23:12:58] Epoch 1 | Step 4320 | Loss: 0.3094 | LM: 0.3033 | LB: 1.1032 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.46e-04 +[2026-04-25 23:13:05] Epoch 1 | Step 4330 | Loss: 0.3094 | LM: 0.3034 | LB: 1.1032 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.43e-04 +[2026-04-25 23:13:12] Epoch 1 | Step 4340 | Loss: 0.3094 | LM: 0.3036 | LB: 1.1032 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.41e-04 +[2026-04-25 23:13:20] Epoch 1 | Step 4350 | Loss: 0.3093 | LM: 0.3036 | LB: 1.1031 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.38e-04 +[2026-04-25 23:13:27] Epoch 1 | Step 4360 | Loss: 0.3093 | LM: 0.3034 | LB: 1.1031 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.35e-04 +[2026-04-25 23:13:34] Epoch 1 | Step 4370 | Loss: 0.3093 | LM: 0.3036 | LB: 1.1030 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.32e-04 +[2026-04-25 23:13:41] Epoch 1 | Step 4380 | Loss: 0.3092 | LM: 0.3036 | LB: 1.1030 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.30e-04 +[2026-04-25 23:13:48] Epoch 1 | Step 4390 | Loss: 0.3093 | LM: 0.3035 | LB: 1.1030 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.27e-04 +[2026-04-25 23:13:55] Epoch 1 | Step 4400 | Loss: 0.3092 | LM: 0.3033 | LB: 1.1029 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.24e-04 +[2026-04-25 23:14:03] Epoch 1 | Step 4410 | Loss: 0.3091 | LM: 0.3032 | LB: 1.1029 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.423/SR1: 0.398 | LR: 1.21e-04 +[2026-04-25 23:14:11] Epoch 1 | Step 4420 | Loss: 0.3092 | LM: 0.3033 | LB: 1.1029 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.398 | LR: 1.18e-04 +[2026-04-25 23:14:19] Epoch 1 | Step 4430 | Loss: 0.3091 | LM: 0.3032 | LB: 1.1029 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.398 | LR: 1.16e-04 +[2026-04-25 23:14:26] Epoch 1 | Step 4440 | Loss: 0.3092 | LM: 0.3032 | LB: 1.1028 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.398 | LR: 1.13e-04 +[2026-04-25 23:14:34] Epoch 1 | Step 4450 | Loss: 0.3091 | LM: 0.3030 | LB: 1.1028 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 1.10e-04 +[2026-04-25 23:14:41] Epoch 1 | Step 4460 | Loss: 0.3092 | LM: 0.3030 | LB: 1.1027 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 1.07e-04 +[2026-04-25 23:14:48] Epoch 1 | Step 4470 | Loss: 0.3091 | LM: 0.3030 | LB: 1.1027 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 1.04e-04 +[2026-04-25 23:14:55] Epoch 1 | Step 4480 | Loss: 0.3090 | LM: 0.3029 | LB: 1.1027 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 1.01e-04 +[2026-04-25 23:15:02] Epoch 1 | Step 4490 | Loss: 0.3089 | LM: 0.3029 | LB: 1.1026 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 9.84e-05 +[2026-04-25 23:15:10] Epoch 1 | Step 4500 | Loss: 0.3089 | LM: 0.3028 | LB: 1.1026 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 9.55e-05 +[2026-04-25 23:15:17] Epoch 1 | Step 4510 | Loss: 0.3088 | LM: 0.3027 | LB: 1.1026 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 9.27e-05 +[2026-04-25 23:15:24] Epoch 1 | Step 4520 | Loss: 0.3087 | LM: 0.3027 | LB: 1.1026 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 8.99e-05 +[2026-04-25 23:15:31] Epoch 1 | Step 4530 | Loss: 0.3087 | LM: 0.3025 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 8.72e-05 +[2026-04-25 23:15:38] Epoch 1 | Step 4540 | Loss: 0.3087 | LM: 0.3026 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 8.44e-05 +[2026-04-25 23:15:45] Epoch 1 | Step 4550 | Loss: 0.3086 | LM: 0.3025 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 8.17e-05 +[2026-04-25 23:15:53] Epoch 1 | Step 4560 | Loss: 0.3086 | LM: 0.3025 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 7.90e-05 +[2026-04-25 23:16:00] Epoch 1 | Step 4570 | Loss: 0.3086 | LM: 0.3025 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 7.63e-05 +[2026-04-25 23:16:07] Epoch 1 | Step 4580 | Loss: 0.3085 | LM: 0.3025 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 7.37e-05 +[2026-04-25 23:16:14] Epoch 1 | Step 4590 | Loss: 0.3085 | LM: 0.3025 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 7.11e-05 +[2026-04-25 23:16:21] Epoch 1 | Step 4600 | Loss: 0.3084 | LM: 0.3023 | LB: 1.1024 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 6.85e-05 +[2026-04-25 23:16:28] Epoch 1 | Step 4610 | Loss: 0.3084 | LM: 0.3022 | LB: 1.1024 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 6.60e-05 +[2026-04-25 23:16:35] Epoch 1 | Step 4620 | Loss: 0.3084 | LM: 0.3021 | LB: 1.1024 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 6.35e-05 +[2026-04-25 23:16:43] Epoch 1 | Step 4630 | Loss: 0.3084 | LM: 0.3022 | LB: 1.1023 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 6.11e-05 +[2026-04-25 23:16:50] Epoch 1 | Step 4640 | Loss: 0.3084 | LM: 0.3022 | LB: 1.1023 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 5.87e-05 +[2026-04-25 23:16:57] Epoch 1 | Step 4650 | Loss: 0.3084 | LM: 0.3021 | LB: 1.1023 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 5.64e-05 +[2026-04-25 23:17:04] Epoch 1 | Step 4660 | Loss: 0.3084 | LM: 0.3020 | LB: 1.1023 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 5.41e-05 +[2026-04-25 23:17:11] Epoch 1 | Step 4670 | Loss: 0.3083 | LM: 0.3021 | LB: 1.1022 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 5.19e-05 +[2026-04-25 23:17:18] Epoch 1 | Step 4680 | Loss: 0.3083 | LM: 0.3020 | LB: 1.1022 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 4.98e-05 +[2026-04-25 23:17:26] Epoch 1 | Step 4690 | Loss: 0.3083 | LM: 0.3019 | LB: 1.1022 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 4.77e-05 +[2026-04-25 23:17:33] Epoch 1 | Step 4700 | Loss: 0.3083 | LM: 0.3020 | LB: 1.1021 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 4.56e-05 +[2026-04-25 23:17:40] Epoch 1 | Step 4710 | Loss: 0.3083 | LM: 0.3019 | LB: 1.1021 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 4.37e-05 +[2026-04-25 23:17:47] Epoch 1 | Step 4720 | Loss: 0.3083 | LM: 0.3020 | LB: 1.1021 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 4.18e-05 +[2026-04-25 23:17:54] Epoch 1 | Step 4730 | Loss: 0.3082 | LM: 0.3019 | LB: 1.1020 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 3.99e-05 +[2026-04-25 23:18:01] Epoch 1 | Step 4740 | Loss: 0.3082 | LM: 0.3019 | LB: 1.1020 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 3.82e-05 +[2026-04-25 23:18:08] Epoch 1 | Step 4750 | Loss: 0.3082 | LM: 0.3018 | LB: 1.1020 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 3.65e-05 +[2026-04-25 23:18:16] Epoch 1 | Step 4760 | Loss: 0.3082 | LM: 0.3017 | LB: 1.1019 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.397 | LR: 3.49e-05 +[2026-04-25 23:18:23] Epoch 1 | Step 4770 | Loss: 0.3081 | LM: 0.3015 | LB: 1.1020 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 3.33e-05 +[2026-04-25 23:18:30] Epoch 1 | Step 4780 | Loss: 0.3081 | LM: 0.3016 | LB: 1.1020 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 3.19e-05 +[2026-04-25 23:18:37] Epoch 1 | Step 4790 | Loss: 0.3081 | LM: 0.3015 | LB: 1.1019 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 3.05e-05 +[2026-04-25 23:18:44] Epoch 1 | Step 4800 | Loss: 0.3080 | LM: 0.3014 | LB: 1.1019 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 2.92e-05 +[2026-04-25 23:18:51] Epoch 1 | Step 4810 | Loss: 0.3079 | LM: 0.3013 | LB: 1.1019 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 2.80e-05 +[2026-04-25 23:18:58] Epoch 1 | Step 4820 | Loss: 0.3079 | LM: 0.3012 | LB: 1.1018 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 2.68e-05 +[2026-04-25 23:19:06] Epoch 1 | Step 4830 | Loss: 0.3077 | LM: 0.3009 | LB: 1.1018 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 2.58e-05 +[2026-04-25 23:19:13] Epoch 1 | Step 4840 | Loss: 0.3078 | LM: 0.3009 | LB: 1.1018 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 2.48e-05 +[2026-04-25 23:19:20] Epoch 1 | Step 4850 | Loss: 0.3078 | LM: 0.3009 | LB: 1.1017 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.422/SR1: 0.396 | LR: 2.39e-05 +[2026-04-25 23:19:27] Epoch 1 | Step 4860 | Loss: 0.3078 | LM: 0.3009 | LB: 1.1017 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.32e-05 +[2026-04-25 23:19:34] Epoch 1 | Step 4870 | Loss: 0.3079 | LM: 0.3009 | LB: 1.1017 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.24e-05 +[2026-04-25 23:19:41] Epoch 1 | Step 4880 | Loss: 0.3079 | LM: 0.3009 | LB: 1.1017 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.18e-05 +[2026-04-25 23:19:48] Epoch 1 | Step 4890 | Loss: 0.3078 | LM: 0.3010 | LB: 1.1016 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.13e-05 +[2026-04-25 23:19:56] Epoch 1 | Step 4900 | Loss: 0.3078 | LM: 0.3008 | LB: 1.1016 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.09e-05 +[2026-04-25 23:20:03] Epoch 1 | Step 4910 | Loss: 0.3077 | LM: 0.3007 | LB: 1.1016 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.05e-05 +[2026-04-25 23:20:10] Epoch 1 | Step 4920 | Loss: 0.3077 | LM: 0.3007 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.03e-05 +[2026-04-25 23:20:17] Epoch 1 | Step 4930 | Loss: 0.3077 | LM: 0.3007 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.01e-05 +[2026-04-25 23:20:24] Epoch 1 | Step 4940 | Loss: 0.3076 | LM: 0.3007 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:20:31] Epoch 1 | Step 4950 | Loss: 0.3076 | LM: 0.3008 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:20:39] Epoch 1 | Step 4960 | Loss: 0.3076 | LM: 0.3008 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:20:46] Epoch 1 | Step 4970 | Loss: 0.3076 | LM: 0.3007 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:20:53] Epoch 1 | Step 4980 | Loss: 0.3076 | LM: 0.3007 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:00] Epoch 1 | Step 4990 | Loss: 0.3075 | LM: 0.3006 | LB: 1.1013 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:07] Epoch 1 | Step 5000 | Loss: 0.3076 | LM: 0.3007 | LB: 1.1013 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:14] Epoch 1 | Step 5010 | Loss: 0.3075 | LM: 0.3008 | LB: 1.1013 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:21] Epoch 1 | Step 5020 | Loss: 0.3074 | LM: 0.3005 | LB: 1.1013 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:28] Epoch 1 | Step 5030 | Loss: 0.3074 | LM: 0.3006 | LB: 1.1013 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:36] Epoch 1 | Step 5040 | Loss: 0.3074 | LM: 0.3004 | LB: 1.1013 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:43] Epoch 1 | Step 5050 | Loss: 0.3074 | LM: 0.3004 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:51] Epoch 1 | Step 5060 | Loss: 0.3073 | LM: 0.3003 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:21:59] Epoch 1 | Step 5070 | Loss: 0.3073 | LM: 0.3002 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:06] Epoch 1 | Step 5080 | Loss: 0.3074 | LM: 0.3002 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:14] Epoch 1 | Step 5090 | Loss: 0.3074 | LM: 0.3002 | LB: 1.1011 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:22] Epoch 1 | Step 5100 | Loss: 0.3073 | LM: 0.3002 | LB: 1.1011 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:29] Epoch 1 | Step 5110 | Loss: 0.3073 | LM: 0.3002 | LB: 1.1011 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:36] Epoch 1 | Step 5120 | Loss: 0.3073 | LM: 0.3003 | LB: 1.1010 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:43] Epoch 1 | Step 5130 | Loss: 0.3072 | LM: 0.3002 | LB: 1.1010 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.396 | LR: 2.00e-05 +[2026-04-25 23:22:51] Epoch 1 | Step 5140 | Loss: 0.3072 | LM: 0.3002 | LB: 1.1010 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:22:58] Epoch 1 | Step 5150 | Loss: 0.3071 | LM: 0.3001 | LB: 1.1010 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:05] Epoch 1 | Step 5160 | Loss: 0.3070 | LM: 0.3000 | LB: 1.1009 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:12] Epoch 1 | Step 5170 | Loss: 0.3070 | LM: 0.3000 | LB: 1.1009 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:19] Epoch 1 | Step 5180 | Loss: 0.3069 | LM: 0.3000 | LB: 1.1009 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:26] Epoch 1 | Step 5190 | Loss: 0.3069 | LM: 0.3000 | LB: 1.1008 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:33] Epoch 1 | Step 5200 | Loss: 0.3069 | LM: 0.3000 | LB: 1.1008 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:40] Epoch 1 | Step 5210 | Loss: 0.3069 | LM: 0.3000 | LB: 1.1008 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:47] Epoch 1 | Step 5220 | Loss: 0.3069 | LM: 0.2999 | LB: 1.1008 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:23:55] Epoch 1 | Step 5230 | Loss: 0.3069 | LM: 0.2997 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:02] Epoch 1 | Step 5240 | Loss: 0.3069 | LM: 0.2997 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:09] Epoch 1 | Step 5250 | Loss: 0.3069 | LM: 0.2998 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:16] Epoch 1 | Step 5260 | Loss: 0.3069 | LM: 0.2998 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:23] Epoch 1 | Step 5270 | Loss: 0.3069 | LM: 0.2996 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.421/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:30] Epoch 1 | Step 5280 | Loss: 0.3068 | LM: 0.2995 | LB: 1.1006 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:37] Epoch 1 | Step 5290 | Loss: 0.3067 | LM: 0.2995 | LB: 1.1006 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:44] Epoch 1 | Step 5300 | Loss: 0.3067 | LM: 0.2994 | LB: 1.1006 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:52] Epoch 1 | Step 5310 | Loss: 0.3068 | LM: 0.2994 | LB: 1.1006 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:24:59] Epoch 1 | Step 5320 | Loss: 0.3067 | LM: 0.2993 | LB: 1.1006 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:06] Epoch 1 | Step 5330 | Loss: 0.3067 | LM: 0.2993 | LB: 1.1005 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:13] Epoch 1 | Step 5340 | Loss: 0.3067 | LM: 0.2993 | LB: 1.1005 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:20] Epoch 1 | Step 5350 | Loss: 0.3066 | LM: 0.2993 | LB: 1.1005 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:27] Epoch 1 | Step 5360 | Loss: 0.3066 | LM: 0.2992 | LB: 1.1004 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:34] Epoch 1 | Step 5370 | Loss: 0.3066 | LM: 0.2991 | LB: 1.1004 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:41] Epoch 1 | Step 5380 | Loss: 0.3065 | LM: 0.2990 | LB: 1.1003 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:48] Epoch 1 | Step 5390 | Loss: 0.3064 | LM: 0.2988 | LB: 1.1003 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:25:56] Epoch 1 | Step 5400 | Loss: 0.3064 | LM: 0.2989 | LB: 1.1003 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:03] Epoch 1 | Step 5410 | Loss: 0.3064 | LM: 0.2988 | LB: 1.1003 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:10] Epoch 1 | Step 5420 | Loss: 0.3064 | LM: 0.2988 | LB: 1.1003 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:17] Epoch 1 | Step 5430 | Loss: 0.3063 | LM: 0.2988 | LB: 1.1002 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:24] Epoch 1 | Step 5440 | Loss: 0.3063 | LM: 0.2987 | LB: 1.1002 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:31] Epoch 1 | Step 5450 | Loss: 0.3064 | LM: 0.2986 | LB: 1.1002 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:38] Epoch 1 | Step 5460 | Loss: 0.3063 | LM: 0.2986 | LB: 1.1001 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:45] Epoch 1 | Step 5470 | Loss: 0.3063 | LM: 0.2985 | LB: 1.1001 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.395 | LR: 2.00e-05 +[2026-04-25 23:26:52] Epoch 1 | Step 5480 | Loss: 0.3063 | LM: 0.2984 | LB: 1.1001 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:00] Epoch 1 | Step 5490 | Loss: 0.3063 | LM: 0.2984 | LB: 1.1000 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:07] Epoch 1 | Step 5500 | Loss: 0.3063 | LM: 0.2985 | LB: 1.1000 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.353 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:14] Epoch 1 | Step 5510 | Loss: 0.3063 | LM: 0.2987 | LB: 1.1000 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:21] Epoch 1 | Step 5520 | Loss: 0.3063 | LM: 0.2986 | LB: 1.1000 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:28] Epoch 1 | Step 5530 | Loss: 0.3062 | LM: 0.2985 | LB: 1.0999 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:35] Epoch 1 | Step 5540 | Loss: 0.3061 | LM: 0.2984 | LB: 1.0999 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:42] Epoch 1 | Step 5550 | Loss: 0.3061 | LM: 0.2983 | LB: 1.0999 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:50] Epoch 1 | Step 5560 | Loss: 0.3061 | LM: 0.2984 | LB: 1.0999 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:27:57] Epoch 1 | Step 5570 | Loss: 0.3062 | LM: 0.2984 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:04] Epoch 1 | Step 5580 | Loss: 0.3061 | LM: 0.2983 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:11] Epoch 1 | Step 5590 | Loss: 0.3061 | LM: 0.2984 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:18] Epoch 1 | Step 5600 | Loss: 0.3061 | LM: 0.2983 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:25] Epoch 1 | Step 5610 | Loss: 0.3061 | LM: 0.2984 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:32] Epoch 1 | Step 5620 | Loss: 0.3061 | LM: 0.2984 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:40] Epoch 1 | Step 5630 | Loss: 0.3061 | LM: 0.2984 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:47] Epoch 1 | Step 5640 | Loss: 0.3061 | LM: 0.2985 | LB: 1.0997 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:28:54] Epoch 1 | Step 5650 | Loss: 0.3061 | LM: 0.2985 | LB: 1.0997 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:01] Epoch 1 | Step 5660 | Loss: 0.3060 | LM: 0.2985 | LB: 1.0997 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:08] Epoch 1 | Step 5670 | Loss: 0.3060 | LM: 0.2984 | LB: 1.0997 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:15] Epoch 1 | Step 5680 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:22] Epoch 1 | Step 5690 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:30] Epoch 1 | Step 5700 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:37] Epoch 1 | Step 5710 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:44] Epoch 1 | Step 5720 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:51] Epoch 1 | Step 5730 | Loss: 0.3059 | LM: 0.2986 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:29:58] Epoch 1 | Step 5740 | Loss: 0.3059 | LM: 0.2985 | LB: 1.0995 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:06] Epoch 1 | Step 5750 | Loss: 0.3059 | LM: 0.2985 | LB: 1.0995 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:13] Epoch 1 | Step 5760 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0995 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.420/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:20] Epoch 1 | Step 5770 | Loss: 0.3060 | LM: 0.2985 | LB: 1.0995 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:27] Epoch 1 | Step 5780 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0994 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:34] Epoch 1 | Step 5790 | Loss: 0.3060 | LM: 0.2983 | LB: 1.0994 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:41] Epoch 1 | Step 5800 | Loss: 0.3060 | LM: 0.2984 | LB: 1.0994 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:48] Epoch 1 | Step 5810 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0994 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:30:56] Epoch 1 | Step 5820 | Loss: 0.3059 | LM: 0.2983 | LB: 1.0994 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:03] Epoch 1 | Step 5830 | Loss: 0.3059 | LM: 0.2983 | LB: 1.0994 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:10] Epoch 1 | Step 5840 | Loss: 0.3058 | LM: 0.2982 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:17] Epoch 1 | Step 5850 | Loss: 0.3058 | LM: 0.2984 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:24] Epoch 1 | Step 5860 | Loss: 0.3058 | LM: 0.2983 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:31] Epoch 1 | Step 5870 | Loss: 0.3058 | LM: 0.2982 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:38] Epoch 1 | Step 5880 | Loss: 0.3059 | LM: 0.2984 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:45] Epoch 1 | Step 5890 | Loss: 0.3058 | LM: 0.2983 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:31:52] Epoch 1 | Step 5900 | Loss: 0.3058 | LM: 0.2982 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:32:00] Epoch 1 | Step 5910 | Loss: 0.3058 | LM: 0.2983 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:32:07] Epoch 1 | Step 5920 | Loss: 0.3057 | LM: 0.2982 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:32:14] Epoch 1 | Step 5930 | Loss: 0.3058 | LM: 0.2983 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.358/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:32:21] Epoch 1 | Step 5940 | Loss: 0.3057 | LM: 0.2983 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.394 | LR: 2.00e-05 +[2026-04-25 23:32:28] Epoch 1 | Step 5950 | Loss: 0.3057 | LM: 0.2982 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:32:35] Epoch 1 | Step 5960 | Loss: 0.3057 | LM: 0.2982 | LB: 1.0992 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:32:42] Epoch 1 | Step 5970 | Loss: 0.3058 | LM: 0.2982 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:32:49] Epoch 1 | Step 5980 | Loss: 0.3057 | LM: 0.2982 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:32:56] Epoch 1 | Step 5990 | Loss: 0.3058 | LM: 0.2983 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:04] Epoch 1 | Step 6000 | Loss: 0.3057 | LM: 0.2983 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:05] Validation | Batch 10/42 | Loss: 0.2824 | LM_LOSS: 0.2715 | LB_LOSS: 1.0825 +[2026-04-25 23:33:06] Validation | Batch 20/42 | Loss: 0.3037 | LM_LOSS: 0.2928 | LB_LOSS: 1.0861 +[2026-04-25 23:33:07] Validation | Batch 30/42 | Loss: 0.2961 | LM_LOSS: 0.2852 | LB_LOSS: 1.0837 +[2026-04-25 23:33:08] Validation | Batch 40/42 | Loss: 0.3007 | LM_LOSS: 0.2899 | LB_LOSS: 1.0829 +[2026-04-25 23:33:08] Validation | Batch 42/42 | Loss: 0.3004 | LM_LOSS: 0.2896 | LB_LOSS: 1.0831 +[2026-04-25 23:33:08] Validation | Loss: 0.3004 | LM_LOSS: 0.2896 | LB_LOSS: 1.0831 | PPL: 1.33 | Time: 4.70s +[2026-04-25 23:33:11] New best model saved! Val loss: 0.3004 +[2026-04-25 23:33:18] Epoch 1 | Step 6010 | Loss: 0.3057 | LM: 0.2981 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:26] Epoch 1 | Step 6020 | Loss: 0.3057 | LM: 0.2980 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:33] Epoch 1 | Step 6030 | Loss: 0.3058 | LM: 0.2980 | LB: 1.0990 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:40] Epoch 1 | Step 6040 | Loss: 0.3058 | LM: 0.2980 | LB: 1.0990 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:47] Epoch 1 | Step 6050 | Loss: 0.3058 | LM: 0.2981 | LB: 1.0990 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:33:54] Epoch 1 | Step 6060 | Loss: 0.3058 | LM: 0.2981 | LB: 1.0990 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:01] Epoch 1 | Step 6070 | Loss: 0.3057 | LM: 0.2980 | LB: 1.0990 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:08] Epoch 1 | Step 6080 | Loss: 0.3057 | LM: 0.2979 | LB: 1.0989 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:15] Epoch 1 | Step 6090 | Loss: 0.3058 | LM: 0.2979 | LB: 1.0989 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:23] Epoch 1 | Step 6100 | Loss: 0.3058 | LM: 0.2979 | LB: 1.0989 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:30] Epoch 1 | Step 6110 | Loss: 0.3058 | LM: 0.2980 | LB: 1.0988 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:37] Epoch 1 | Step 6120 | Loss: 0.3058 | LM: 0.2979 | LB: 1.0988 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:44] Epoch 1 | Step 6130 | Loss: 0.3057 | LM: 0.2979 | LB: 1.0988 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:51] Epoch 1 | Step 6140 | Loss: 0.3056 | LM: 0.2977 | LB: 1.0988 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:34:58] Epoch 1 | Step 6150 | Loss: 0.3055 | LM: 0.2977 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:05] Epoch 1 | Step 6160 | Loss: 0.3055 | LM: 0.2976 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:12] Epoch 1 | Step 6170 | Loss: 0.3055 | LM: 0.2976 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:19] Epoch 1 | Step 6180 | Loss: 0.3055 | LM: 0.2976 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:26] Epoch 1 | Step 6190 | Loss: 0.3054 | LM: 0.2976 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:33] Epoch 1 | Step 6200 | Loss: 0.3054 | LM: 0.2975 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:41] Epoch 1 | Step 6210 | Loss: 0.3054 | LM: 0.2974 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:48] Epoch 1 | Step 6220 | Loss: 0.3054 | LM: 0.2975 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:35:55] Epoch 1 | Step 6230 | Loss: 0.3054 | LM: 0.2976 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:02] Epoch 1 | Step 6240 | Loss: 0.3054 | LM: 0.2975 | LB: 1.0987 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:09] Epoch 1 | Step 6250 | Loss: 0.3053 | LM: 0.2976 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:16] Epoch 1 | Step 6260 | Loss: 0.3053 | LM: 0.2976 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:23] Epoch 1 | Step 6270 | Loss: 0.3052 | LM: 0.2975 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:30] Epoch 1 | Step 6280 | Loss: 0.3052 | LM: 0.2973 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:37] Epoch 1 | Step 6290 | Loss: 0.3052 | LM: 0.2973 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:45] Epoch 1 | Step 6300 | Loss: 0.3052 | LM: 0.2973 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:52] Epoch 1 | Step 6310 | Loss: 0.3051 | LM: 0.2973 | LB: 1.0985 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:36:59] Epoch 1 | Step 6320 | Loss: 0.3051 | LM: 0.2973 | LB: 1.0985 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:06] Epoch 1 | Step 6330 | Loss: 0.3052 | LM: 0.2974 | LB: 1.0985 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:13] Epoch 1 | Step 6340 | Loss: 0.3052 | LM: 0.2975 | LB: 1.0984 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:20] Epoch 1 | Step 6350 | Loss: 0.3052 | LM: 0.2975 | LB: 1.0984 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:27] Epoch 1 | Step 6360 | Loss: 0.3052 | LM: 0.2976 | LB: 1.0984 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:34] Epoch 1 | Step 6370 | Loss: 0.3052 | LM: 0.2976 | LB: 1.0984 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.419/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:42] Epoch 1 | Step 6380 | Loss: 0.3052 | LM: 0.2975 | LB: 1.0984 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:49] Epoch 1 | Step 6390 | Loss: 0.3051 | LM: 0.2974 | LB: 1.0984 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:37:56] Epoch 1 | Step 6400 | Loss: 0.3051 | LM: 0.2974 | LB: 1.0983 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:38:03] Epoch 1 | Step 6410 | Loss: 0.3051 | LM: 0.2973 | LB: 1.0983 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:38:10] Epoch 1 | Step 6420 | Loss: 0.3051 | LM: 0.2972 | LB: 1.0983 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:38:17] Epoch 1 | Step 6430 | Loss: 0.3050 | LM: 0.2973 | LB: 1.0983 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:38:24] Epoch 1 | Step 6440 | Loss: 0.3051 | LM: 0.2972 | LB: 1.0983 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.393 | LR: 2.00e-05 +[2026-04-25 23:38:31] Epoch 1 | Step 6450 | Loss: 0.3050 | LM: 0.2971 | LB: 1.0983 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:38:38] Epoch 1 | Step 6460 | Loss: 0.3049 | LM: 0.2970 | LB: 1.0982 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:38:45] Epoch 1 | Step 6470 | Loss: 0.3049 | LM: 0.2969 | LB: 1.0982 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:38:52] Epoch 1 | Step 6480 | Loss: 0.3050 | LM: 0.2969 | LB: 1.0982 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:00] Epoch 1 | Step 6490 | Loss: 0.3050 | LM: 0.2970 | LB: 1.0982 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:07] Epoch 1 | Step 6500 | Loss: 0.3049 | LM: 0.2969 | LB: 1.0982 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:14] Epoch 1 | Step 6510 | Loss: 0.3049 | LM: 0.2969 | LB: 1.0981 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:21] Epoch 1 | Step 6520 | Loss: 0.3049 | LM: 0.2969 | LB: 1.0981 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:28] Epoch 1 | Step 6530 | Loss: 0.3048 | LM: 0.2968 | LB: 1.0981 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:35] Epoch 1 | Step 6540 | Loss: 0.3048 | LM: 0.2968 | LB: 1.0981 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:42] Epoch 1 | Step 6550 | Loss: 0.3047 | LM: 0.2966 | LB: 1.0981 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:49] Epoch 1 | Step 6560 | Loss: 0.3047 | LM: 0.2966 | LB: 1.0981 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:39:56] Epoch 1 | Step 6570 | Loss: 0.3047 | LM: 0.2966 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:03] Epoch 1 | Step 6580 | Loss: 0.3047 | LM: 0.2967 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:10] Epoch 1 | Step 6590 | Loss: 0.3047 | LM: 0.2967 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:18] Epoch 1 | Step 6600 | Loss: 0.3046 | LM: 0.2967 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:25] Epoch 1 | Step 6610 | Loss: 0.3046 | LM: 0.2966 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:32] Epoch 1 | Step 6620 | Loss: 0.3046 | LM: 0.2964 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:39] Epoch 1 | Step 6630 | Loss: 0.3046 | LM: 0.2963 | LB: 1.0980 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:46] Epoch 1 | Step 6640 | Loss: 0.3046 | LM: 0.2964 | LB: 1.0979 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:40:53] Epoch 1 | Step 6650 | Loss: 0.3046 | LM: 0.2964 | LB: 1.0979 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:00] Epoch 1 | Step 6660 | Loss: 0.3045 | LM: 0.2963 | LB: 1.0979 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:07] Epoch 1 | Step 6670 | Loss: 0.3045 | LM: 0.2962 | LB: 1.0979 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:14] Epoch 1 | Step 6680 | Loss: 0.3045 | LM: 0.2961 | LB: 1.0978 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:21] Epoch 1 | Step 6690 | Loss: 0.3045 | LM: 0.2961 | LB: 1.0978 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:28] Epoch 1 | Step 6700 | Loss: 0.3045 | LM: 0.2960 | LB: 1.0978 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:35] Epoch 1 | Step 6710 | Loss: 0.3044 | LM: 0.2960 | LB: 1.0978 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:42] Epoch 1 | Step 6720 | Loss: 0.3045 | LM: 0.2960 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:50] Epoch 1 | Step 6730 | Loss: 0.3045 | LM: 0.2960 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:41:57] Epoch 1 | Step 6740 | Loss: 0.3045 | LM: 0.2960 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:04] Epoch 1 | Step 6750 | Loss: 0.3044 | LM: 0.2961 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:11] Epoch 1 | Step 6760 | Loss: 0.3045 | LM: 0.2961 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:18] Epoch 1 | Step 6770 | Loss: 0.3044 | LM: 0.2961 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:25] Epoch 1 | Step 6780 | Loss: 0.3044 | LM: 0.2961 | LB: 1.0977 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:32] Epoch 1 | Step 6790 | Loss: 0.3045 | LM: 0.2961 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:40] Epoch 1 | Step 6800 | Loss: 0.3045 | LM: 0.2960 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:47] Epoch 1 | Step 6810 | Loss: 0.3046 | LM: 0.2960 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:42:54] Epoch 1 | Step 6820 | Loss: 0.3046 | LM: 0.2959 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:01] Epoch 1 | Step 6830 | Loss: 0.3047 | LM: 0.2960 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:08] Epoch 1 | Step 6840 | Loss: 0.3047 | LM: 0.2960 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:15] Epoch 1 | Step 6850 | Loss: 0.3047 | LM: 0.2960 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:22] Epoch 1 | Step 6860 | Loss: 0.3047 | LM: 0.2960 | LB: 1.0976 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:29] Epoch 1 | Step 6870 | Loss: 0.3047 | LM: 0.2960 | LB: 1.0975 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:36] Epoch 1 | Step 6880 | Loss: 0.3047 | LM: 0.2959 | LB: 1.0975 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:43] Epoch 1 | Step 6890 | Loss: 0.3047 | LM: 0.2959 | LB: 1.0975 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:51] Epoch 1 | Step 6900 | Loss: 0.3047 | LM: 0.2959 | LB: 1.0975 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:43:58] Epoch 1 | Step 6910 | Loss: 0.3046 | LM: 0.2958 | LB: 1.0975 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:05] Epoch 1 | Step 6920 | Loss: 0.3046 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:12] Epoch 1 | Step 6930 | Loss: 0.3046 | LM: 0.2959 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:19] Epoch 1 | Step 6940 | Loss: 0.3045 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:26] Epoch 1 | Step 6950 | Loss: 0.3045 | LM: 0.2957 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:33] Epoch 1 | Step 6960 | Loss: 0.3045 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:40] Epoch 1 | Step 6970 | Loss: 0.3045 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:47] Epoch 1 | Step 6980 | Loss: 0.3045 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:44:55] Epoch 1 | Step 6990 | Loss: 0.3044 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:45:02] Epoch 1 | Step 7000 | Loss: 0.3044 | LM: 0.2958 | LB: 1.0974 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:45:09] Epoch 1 | Step 7010 | Loss: 0.3043 | LM: 0.2958 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.392 | LR: 2.00e-05 +[2026-04-25 23:45:16] Epoch 1 | Step 7020 | Loss: 0.3043 | LM: 0.2959 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:45:23] Epoch 1 | Step 7030 | Loss: 0.3043 | LM: 0.2958 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:45:30] Epoch 1 | Step 7040 | Loss: 0.3043 | LM: 0.2958 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:45:37] Epoch 1 | Step 7050 | Loss: 0.3043 | LM: 0.2959 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:45:44] Epoch 1 | Step 7060 | Loss: 0.3042 | LM: 0.2959 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:45:51] Epoch 1 | Step 7070 | Loss: 0.3043 | LM: 0.2959 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:45:59] Epoch 1 | Step 7080 | Loss: 0.3043 | LM: 0.2958 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:06] Epoch 1 | Step 7090 | Loss: 0.3043 | LM: 0.2960 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:13] Epoch 1 | Step 7100 | Loss: 0.3042 | LM: 0.2958 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:20] Epoch 1 | Step 7110 | Loss: 0.3042 | LM: 0.2958 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:27] Epoch 1 | Step 7120 | Loss: 0.3042 | LM: 0.2957 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.418/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:34] Epoch 1 | Step 7130 | Loss: 0.3042 | LM: 0.2956 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:41] Epoch 1 | Step 7140 | Loss: 0.3042 | LM: 0.2956 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:48] Epoch 1 | Step 7150 | Loss: 0.3042 | LM: 0.2955 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:46:55] Epoch 1 | Step 7160 | Loss: 0.3041 | LM: 0.2954 | LB: 1.0973 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:02] Epoch 1 | Step 7170 | Loss: 0.3042 | LM: 0.2955 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:09] Epoch 1 | Step 7180 | Loss: 0.3042 | LM: 0.2954 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:16] Epoch 1 | Step 7190 | Loss: 0.3042 | LM: 0.2955 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:24] Epoch 1 | Step 7200 | Loss: 0.3041 | LM: 0.2955 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:31] Epoch 1 | Step 7210 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:38] Epoch 1 | Step 7220 | Loss: 0.3041 | LM: 0.2954 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:45] Epoch 1 | Step 7230 | Loss: 0.3041 | LM: 0.2954 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:52] Epoch 1 | Step 7240 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:47:59] Epoch 1 | Step 7250 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0972 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:06] Epoch 1 | Step 7260 | Loss: 0.3041 | LM: 0.2952 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:13] Epoch 1 | Step 7270 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:20] Epoch 1 | Step 7280 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:28] Epoch 1 | Step 7290 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:35] Epoch 1 | Step 7300 | Loss: 0.3041 | LM: 0.2953 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:42] Epoch 1 | Step 7310 | Loss: 0.3040 | LM: 0.2952 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:49] Epoch 1 | Step 7320 | Loss: 0.3040 | LM: 0.2953 | LB: 1.0971 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:48:56] Epoch 1 | Step 7330 | Loss: 0.3040 | LM: 0.2953 | LB: 1.0970 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:03] Epoch 1 | Step 7340 | Loss: 0.3041 | LM: 0.2954 | LB: 1.0970 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:10] Epoch 1 | Step 7350 | Loss: 0.3041 | LM: 0.2955 | LB: 1.0970 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:17] Epoch 1 | Step 7360 | Loss: 0.3040 | LM: 0.2954 | LB: 1.0970 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:24] Epoch 1 | Step 7370 | Loss: 0.3040 | LM: 0.2953 | LB: 1.0970 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:31] Epoch 1 | Step 7380 | Loss: 0.3039 | LM: 0.2952 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:38] Epoch 1 | Step 7390 | Loss: 0.3039 | LM: 0.2952 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:45] Epoch 1 | Step 7400 | Loss: 0.3039 | LM: 0.2951 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:49:52] Epoch 1 | Step 7410 | Loss: 0.3039 | LM: 0.2951 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:00] Epoch 1 | Step 7420 | Loss: 0.3039 | LM: 0.2951 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:07] Epoch 1 | Step 7430 | Loss: 0.3039 | LM: 0.2951 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:14] Epoch 1 | Step 7440 | Loss: 0.3039 | LM: 0.2951 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:21] Epoch 1 | Step 7450 | Loss: 0.3039 | LM: 0.2950 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:28] Epoch 1 | Step 7460 | Loss: 0.3039 | LM: 0.2949 | LB: 1.0969 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:35] Epoch 1 | Step 7470 | Loss: 0.3039 | LM: 0.2948 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:42] Epoch 1 | Step 7480 | Loss: 0.3039 | LM: 0.2947 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:49] Epoch 1 | Step 7490 | Loss: 0.3039 | LM: 0.2948 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:50:57] Epoch 1 | Step 7500 | Loss: 0.3039 | LM: 0.2948 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:04] Epoch 1 | Step 7510 | Loss: 0.3039 | LM: 0.2947 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:11] Epoch 1 | Step 7520 | Loss: 0.3039 | LM: 0.2947 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:18] Epoch 1 | Step 7530 | Loss: 0.3038 | LM: 0.2946 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:25] Epoch 1 | Step 7540 | Loss: 0.3038 | LM: 0.2946 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:32] Epoch 1 | Step 7550 | Loss: 0.3038 | LM: 0.2946 | LB: 1.0968 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:39] Epoch 1 | Step 7560 | Loss: 0.3038 | LM: 0.2945 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:46] Epoch 1 | Step 7570 | Loss: 0.3038 | LM: 0.2945 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:51:53] Epoch 1 | Step 7580 | Loss: 0.3037 | LM: 0.2946 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:00] Epoch 1 | Step 7590 | Loss: 0.3037 | LM: 0.2945 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:08] Epoch 1 | Step 7600 | Loss: 0.3037 | LM: 0.2946 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:15] Epoch 1 | Step 7610 | Loss: 0.3037 | LM: 0.2947 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:22] Epoch 1 | Step 7620 | Loss: 0.3036 | LM: 0.2946 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:29] Epoch 1 | Step 7630 | Loss: 0.3036 | LM: 0.2945 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:36] Epoch 1 | Step 7640 | Loss: 0.3036 | LM: 0.2944 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:43] Epoch 1 | Step 7650 | Loss: 0.3035 | LM: 0.2944 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:51] Epoch 1 | Step 7660 | Loss: 0.3035 | LM: 0.2944 | LB: 1.0967 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:52:58] Epoch 1 | Step 7670 | Loss: 0.3034 | LM: 0.2943 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:05] Epoch 1 | Step 7680 | Loss: 0.3034 | LM: 0.2943 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:12] Epoch 1 | Step 7690 | Loss: 0.3034 | LM: 0.2943 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:19] Epoch 1 | Step 7700 | Loss: 0.3034 | LM: 0.2943 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:26] Epoch 1 | Step 7710 | Loss: 0.3033 | LM: 0.2941 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:34] Epoch 1 | Step 7720 | Loss: 0.3034 | LM: 0.2941 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:41] Epoch 1 | Step 7730 | Loss: 0.3034 | LM: 0.2942 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:48] Epoch 1 | Step 7740 | Loss: 0.3035 | LM: 0.2942 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:53:55] Epoch 1 | Step 7750 | Loss: 0.3035 | LM: 0.2943 | LB: 1.0966 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:54:02] Epoch 1 | Step 7760 | Loss: 0.3034 | LM: 0.2942 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:54:09] Epoch 1 | Step 7770 | Loss: 0.3034 | LM: 0.2941 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:54:16] Epoch 1 | Step 7780 | Loss: 0.3034 | LM: 0.2941 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:54:23] Epoch 1 | Step 7790 | Loss: 0.3034 | LM: 0.2940 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:54:31] Epoch 1 | Step 7800 | Loss: 0.3033 | LM: 0.2940 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.391 | LR: 2.00e-05 +[2026-04-25 23:54:38] Epoch 1 | Step 7810 | Loss: 0.3034 | LM: 0.2940 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:54:45] Epoch 1 | Step 7820 | Loss: 0.3034 | LM: 0.2941 | LB: 1.0965 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:54:52] Epoch 1 | Step 7830 | Loss: 0.3034 | LM: 0.2941 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:54:59] Epoch 1 | Step 7840 | Loss: 0.3033 | LM: 0.2939 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:06] Epoch 1 | Step 7850 | Loss: 0.3032 | LM: 0.2938 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:13] Epoch 1 | Step 7860 | Loss: 0.3032 | LM: 0.2938 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:21] Epoch 1 | Step 7870 | Loss: 0.3032 | LM: 0.2937 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:28] Epoch 1 | Step 7880 | Loss: 0.3032 | LM: 0.2937 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:35] Epoch 1 | Step 7890 | Loss: 0.3032 | LM: 0.2937 | LB: 1.0964 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:42] Epoch 1 | Step 7900 | Loss: 0.3032 | LM: 0.2937 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:49] Epoch 1 | Step 7910 | Loss: 0.3032 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:55:56] Epoch 1 | Step 7920 | Loss: 0.3032 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:03] Epoch 1 | Step 7930 | Loss: 0.3032 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:10] Epoch 1 | Step 7940 | Loss: 0.3032 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:18] Epoch 1 | Step 7950 | Loss: 0.3033 | LM: 0.2940 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:25] Epoch 1 | Step 7960 | Loss: 0.3033 | LM: 0.2940 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:32] Epoch 1 | Step 7970 | Loss: 0.3033 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:39] Epoch 1 | Step 7980 | Loss: 0.3032 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:46] Epoch 1 | Step 7990 | Loss: 0.3032 | LM: 0.2939 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:53] Epoch 1 | Step 8000 | Loss: 0.3032 | LM: 0.2938 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:56:54] Validation | Batch 10/42 | Loss: 0.2812 | LM_LOSS: 0.2703 | LB_LOSS: 1.0824 +[2026-04-25 23:56:55] Validation | Batch 20/42 | Loss: 0.3024 | LM_LOSS: 0.2916 | LB_LOSS: 1.0860 +[2026-04-25 23:56:56] Validation | Batch 30/42 | Loss: 0.2947 | LM_LOSS: 0.2839 | LB_LOSS: 1.0836 +[2026-04-25 23:56:58] Validation | Batch 40/42 | Loss: 0.2994 | LM_LOSS: 0.2886 | LB_LOSS: 1.0828 +[2026-04-25 23:56:58] Validation | Batch 42/42 | Loss: 0.2991 | LM_LOSS: 0.2883 | LB_LOSS: 1.0830 +[2026-04-25 23:56:58] Validation | Loss: 0.2991 | LM_LOSS: 0.2883 | LB_LOSS: 1.0830 | PPL: 1.33 | Time: 4.70s +[2026-04-25 23:57:01] New best model saved! Val loss: 0.2991 +[2026-04-25 23:57:08] Epoch 1 | Step 8010 | Loss: 0.3032 | LM: 0.2937 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:15] Epoch 1 | Step 8020 | Loss: 0.3031 | LM: 0.2937 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:22] Epoch 1 | Step 8030 | Loss: 0.3031 | LM: 0.2936 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:29] Epoch 1 | Step 8040 | Loss: 0.3031 | LM: 0.2935 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:37] Epoch 1 | Step 8050 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:44] Epoch 1 | Step 8060 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:51] Epoch 1 | Step 8070 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0963 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:57:58] Epoch 1 | Step 8080 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:05] Epoch 1 | Step 8090 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:12] Epoch 1 | Step 8100 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:19] Epoch 1 | Step 8110 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:27] Epoch 1 | Step 8120 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.417/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:34] Epoch 1 | Step 8130 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:41] Epoch 1 | Step 8140 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0962 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:48] Epoch 1 | Step 8150 | Loss: 0.3030 | LM: 0.2932 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:58:55] Epoch 1 | Step 8160 | Loss: 0.3029 | LM: 0.2932 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:02] Epoch 1 | Step 8170 | Loss: 0.3029 | LM: 0.2931 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:09] Epoch 1 | Step 8180 | Loss: 0.3029 | LM: 0.2932 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:16] Epoch 1 | Step 8190 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:24] Epoch 1 | Step 8200 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:31] Epoch 1 | Step 8210 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:38] Epoch 1 | Step 8220 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:45] Epoch 1 | Step 8230 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0961 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:52] Epoch 1 | Step 8240 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-25 23:59:59] Epoch 1 | Step 8250 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:06] Epoch 1 | Step 8260 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:13] Epoch 1 | Step 8270 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:20] Epoch 1 | Step 8280 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:28] Epoch 1 | Step 8290 | Loss: 0.3031 | LM: 0.2933 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:35] Epoch 1 | Step 8300 | Loss: 0.3030 | LM: 0.2932 | LB: 1.0960 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:42] Epoch 1 | Step 8310 | Loss: 0.3030 | LM: 0.2932 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:49] Epoch 1 | Step 8320 | Loss: 0.3030 | LM: 0.2931 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:00:56] Epoch 1 | Step 8330 | Loss: 0.3030 | LM: 0.2932 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:03] Epoch 1 | Step 8340 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:10] Epoch 1 | Step 8350 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:17] Epoch 1 | Step 8360 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:24] Epoch 1 | Step 8370 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:32] Epoch 1 | Step 8380 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:39] Epoch 1 | Step 8390 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:46] Epoch 1 | Step 8400 | Loss: 0.3029 | LM: 0.2934 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:01:53] Epoch 1 | Step 8410 | Loss: 0.3030 | LM: 0.2933 | LB: 1.0959 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:00] Epoch 1 | Step 8420 | Loss: 0.3030 | LM: 0.2934 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:07] Epoch 1 | Step 8430 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:14] Epoch 1 | Step 8440 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:21] Epoch 1 | Step 8450 | Loss: 0.3029 | LM: 0.2932 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:28] Epoch 1 | Step 8460 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:36] Epoch 1 | Step 8470 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:43] Epoch 1 | Step 8480 | Loss: 0.3029 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:50] Epoch 1 | Step 8490 | Loss: 0.3028 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:02:57] Epoch 1 | Step 8500 | Loss: 0.3028 | LM: 0.2933 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:04] Epoch 1 | Step 8510 | Loss: 0.3028 | LM: 0.2932 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:11] Epoch 1 | Step 8520 | Loss: 0.3028 | LM: 0.2931 | LB: 1.0958 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:18] Epoch 1 | Step 8530 | Loss: 0.3028 | LM: 0.2931 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:26] Epoch 1 | Step 8540 | Loss: 0.3029 | LM: 0.2932 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:33] Epoch 1 | Step 8550 | Loss: 0.3029 | LM: 0.2932 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:40] Epoch 1 | Step 8560 | Loss: 0.3029 | LM: 0.2931 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:47] Epoch 1 | Step 8570 | Loss: 0.3029 | LM: 0.2931 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:03:54] Epoch 1 | Step 8580 | Loss: 0.3028 | LM: 0.2931 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:01] Epoch 1 | Step 8590 | Loss: 0.3028 | LM: 0.2930 | LB: 1.0957 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:09] Epoch 1 | Step 8600 | Loss: 0.3027 | LM: 0.2929 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:16] Epoch 1 | Step 8610 | Loss: 0.3028 | LM: 0.2930 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:23] Epoch 1 | Step 8620 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:30] Epoch 1 | Step 8630 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:37] Epoch 1 | Step 8640 | Loss: 0.3027 | LM: 0.2929 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:44] Epoch 1 | Step 8650 | Loss: 0.3028 | LM: 0.2929 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.390 | LR: 2.00e-05 +[2026-04-26 00:04:51] Epoch 1 | Step 8660 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:04:59] Epoch 1 | Step 8670 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:06] Epoch 1 | Step 8680 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:13] Epoch 1 | Step 8690 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:20] Epoch 1 | Step 8700 | Loss: 0.3027 | LM: 0.2928 | LB: 1.0956 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:27] Epoch 1 | Step 8710 | Loss: 0.3026 | LM: 0.2927 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:34] Epoch 1 | Step 8720 | Loss: 0.3026 | LM: 0.2927 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:41] Epoch 1 | Step 8730 | Loss: 0.3026 | LM: 0.2927 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:49] Epoch 1 | Step 8740 | Loss: 0.3026 | LM: 0.2927 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:05:56] Epoch 1 | Step 8750 | Loss: 0.3026 | LM: 0.2927 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:03] Epoch 1 | Step 8760 | Loss: 0.3026 | LM: 0.2928 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:10] Epoch 1 | Step 8770 | Loss: 0.3025 | LM: 0.2928 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:17] Epoch 1 | Step 8780 | Loss: 0.3025 | LM: 0.2928 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:24] Epoch 1 | Step 8790 | Loss: 0.3025 | LM: 0.2928 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:32] Epoch 1 | Step 8800 | Loss: 0.3025 | LM: 0.2928 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:39] Epoch 1 | Step 8810 | Loss: 0.3024 | LM: 0.2928 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:46] Epoch 1 | Step 8820 | Loss: 0.3024 | LM: 0.2929 | LB: 1.0955 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:06:53] Epoch 1 | Step 8830 | Loss: 0.3024 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:00] Epoch 1 | Step 8840 | Loss: 0.3024 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:07] Epoch 1 | Step 8850 | Loss: 0.3024 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:15] Epoch 1 | Step 8860 | Loss: 0.3024 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:22] Epoch 1 | Step 8870 | Loss: 0.3024 | LM: 0.2930 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:29] Epoch 1 | Step 8880 | Loss: 0.3024 | LM: 0.2930 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:36] Epoch 1 | Step 8890 | Loss: 0.3023 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:43] Epoch 1 | Step 8900 | Loss: 0.3023 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:51] Epoch 1 | Step 8910 | Loss: 0.3023 | LM: 0.2930 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:07:58] Epoch 1 | Step 8920 | Loss: 0.3022 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:05] Epoch 1 | Step 8930 | Loss: 0.3022 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:12] Epoch 1 | Step 8940 | Loss: 0.3022 | LM: 0.2929 | LB: 1.0954 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:19] Epoch 1 | Step 8950 | Loss: 0.3022 | LM: 0.2928 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:26] Epoch 1 | Step 8960 | Loss: 0.3022 | LM: 0.2929 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:33] Epoch 1 | Step 8970 | Loss: 0.3022 | LM: 0.2928 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:40] Epoch 1 | Step 8980 | Loss: 0.3021 | LM: 0.2928 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:48] Epoch 1 | Step 8990 | Loss: 0.3021 | LM: 0.2927 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:08:55] Epoch 1 | Step 9000 | Loss: 0.3021 | LM: 0.2928 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:02] Epoch 1 | Step 9010 | Loss: 0.3021 | LM: 0.2927 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:09] Epoch 1 | Step 9020 | Loss: 0.3021 | LM: 0.2927 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:16] Epoch 1 | Step 9030 | Loss: 0.3021 | LM: 0.2928 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:23] Epoch 1 | Step 9040 | Loss: 0.3021 | LM: 0.2927 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:30] Epoch 1 | Step 9050 | Loss: 0.3021 | LM: 0.2927 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:37] Epoch 1 | Step 9060 | Loss: 0.3021 | LM: 0.2926 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:45] Epoch 1 | Step 9070 | Loss: 0.3021 | LM: 0.2927 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:52] Epoch 1 | Step 9080 | Loss: 0.3021 | LM: 0.2926 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:09:59] Epoch 1 | Step 9090 | Loss: 0.3020 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:06] Epoch 1 | Step 9100 | Loss: 0.3021 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:13] Epoch 1 | Step 9110 | Loss: 0.3021 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:20] Epoch 1 | Step 9120 | Loss: 0.3021 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:27] Epoch 1 | Step 9130 | Loss: 0.3020 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:35] Epoch 1 | Step 9140 | Loss: 0.3020 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:42] Epoch 1 | Step 9150 | Loss: 0.3020 | LM: 0.2925 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:49] Epoch 1 | Step 9160 | Loss: 0.3020 | LM: 0.2924 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:10:56] Epoch 1 | Step 9170 | Loss: 0.3019 | LM: 0.2924 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:03] Epoch 1 | Step 9180 | Loss: 0.3019 | LM: 0.2924 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:10] Epoch 1 | Step 9190 | Loss: 0.3019 | LM: 0.2923 | LB: 1.0953 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:17] Epoch 1 | Step 9200 | Loss: 0.3018 | LM: 0.2922 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:25] Epoch 1 | Step 9210 | Loss: 0.3019 | LM: 0.2922 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:32] Epoch 1 | Step 9220 | Loss: 0.3018 | LM: 0.2922 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:39] Epoch 1 | Step 9230 | Loss: 0.3018 | LM: 0.2921 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:46] Epoch 1 | Step 9240 | Loss: 0.3018 | LM: 0.2921 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:11:53] Epoch 1 | Step 9250 | Loss: 0.3017 | LM: 0.2920 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:00] Epoch 1 | Step 9260 | Loss: 0.3017 | LM: 0.2921 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:07] Epoch 1 | Step 9270 | Loss: 0.3017 | LM: 0.2920 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:14] Epoch 1 | Step 9280 | Loss: 0.3017 | LM: 0.2920 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:21] Epoch 1 | Step 9290 | Loss: 0.3017 | LM: 0.2920 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.416/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:28] Epoch 1 | Step 9300 | Loss: 0.3017 | LM: 0.2919 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:36] Epoch 1 | Step 9310 | Loss: 0.3016 | LM: 0.2919 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:43] Epoch 1 | Step 9320 | Loss: 0.3016 | LM: 0.2919 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:50] Epoch 1 | Step 9330 | Loss: 0.3016 | LM: 0.2919 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:12:57] Epoch 1 | Step 9340 | Loss: 0.3016 | LM: 0.2919 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:04] Epoch 1 | Step 9350 | Loss: 0.3016 | LM: 0.2919 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:11] Epoch 1 | Step 9360 | Loss: 0.3015 | LM: 0.2918 | LB: 1.0952 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:18] Epoch 1 | Step 9370 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:25] Epoch 1 | Step 9380 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:32] Epoch 1 | Step 9390 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:40] Epoch 1 | Step 9400 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:47] Epoch 1 | Step 9410 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:13:54] Epoch 1 | Step 9420 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:01] Epoch 1 | Step 9430 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:08] Epoch 1 | Step 9440 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:16] Epoch 1 | Step 9450 | Loss: 0.3015 | LM: 0.2917 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:23] Epoch 1 | Step 9460 | Loss: 0.3015 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:30] Epoch 1 | Step 9470 | Loss: 0.3014 | LM: 0.2915 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:37] Epoch 1 | Step 9480 | Loss: 0.3014 | LM: 0.2915 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:44] Epoch 1 | Step 9490 | Loss: 0.3014 | LM: 0.2915 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:51] Epoch 1 | Step 9500 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:14:58] Epoch 1 | Step 9510 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:05] Epoch 1 | Step 9520 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:13] Epoch 1 | Step 9530 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:20] Epoch 1 | Step 9540 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0951 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:27] Epoch 1 | Step 9550 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:34] Epoch 1 | Step 9560 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:41] Epoch 1 | Step 9570 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:48] Epoch 1 | Step 9580 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:15:55] Epoch 1 | Step 9590 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:02] Epoch 1 | Step 9600 | Loss: 0.3014 | LM: 0.2916 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:09] Epoch 1 | Step 9610 | Loss: 0.3013 | LM: 0.2915 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:16] Epoch 1 | Step 9620 | Loss: 0.3013 | LM: 0.2914 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:24] Epoch 1 | Step 9630 | Loss: 0.3014 | LM: 0.2915 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:31] Epoch 1 | Step 9640 | Loss: 0.3013 | LM: 0.2914 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:38] Epoch 1 | Step 9650 | Loss: 0.3014 | LM: 0.2914 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:45] Epoch 1 | Step 9660 | Loss: 0.3013 | LM: 0.2913 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:52] Epoch 1 | Step 9670 | Loss: 0.3013 | LM: 0.2914 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:16:59] Epoch 1 | Step 9680 | Loss: 0.3013 | LM: 0.2913 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:06] Epoch 1 | Step 9690 | Loss: 0.3013 | LM: 0.2913 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:13] Epoch 1 | Step 9700 | Loss: 0.3013 | LM: 0.2913 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:20] Epoch 1 | Step 9710 | Loss: 0.3013 | LM: 0.2912 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:27] Epoch 1 | Step 9720 | Loss: 0.3013 | LM: 0.2912 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:34] Epoch 1 | Step 9730 | Loss: 0.3013 | LM: 0.2912 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:41] Epoch 1 | Step 9740 | Loss: 0.3012 | LM: 0.2912 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:49] Epoch 1 | Step 9750 | Loss: 0.3012 | LM: 0.2911 | LB: 1.0950 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:17:56] Epoch 1 | Step 9760 | Loss: 0.3012 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:03] Epoch 1 | Step 9770 | Loss: 0.3012 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:10] Epoch 1 | Step 9780 | Loss: 0.3012 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:17] Epoch 1 | Step 9790 | Loss: 0.3012 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:24] Epoch 1 | Step 9800 | Loss: 0.3012 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:31] Epoch 1 | Step 9810 | Loss: 0.3011 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:38] Epoch 1 | Step 9820 | Loss: 0.3011 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:45] Epoch 1 | Step 9830 | Loss: 0.3011 | LM: 0.2910 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:18:53] Epoch 1 | Step 9840 | Loss: 0.3011 | LM: 0.2910 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.389 | LR: 2.00e-05 +[2026-04-26 00:19:00] Epoch 1 | Step 9850 | Loss: 0.3011 | LM: 0.2910 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.388 | LR: 2.00e-05 +[2026-04-26 00:19:07] Epoch 1 | Step 9860 | Loss: 0.3011 | LM: 0.2911 | LB: 1.0948 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.388 | LR: 2.00e-05 +[2026-04-26 00:19:14] Epoch 1 | Step 9870 | Loss: 0.3011 | LM: 0.2911 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.388 | LR: 2.00e-05 +[2026-04-26 00:19:21] Epoch 1 | Step 9880 | Loss: 0.3011 | LM: 0.2912 | LB: 1.0949 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.415/SR1: 0.388 | LR: 2.00e-05 +[2026-04-26 00:19:26] Epoch 1 completed in 7104.99s | Loss: 0.3011 | CL0: 2.8 | CL1: 2.4 +[2026-04-26 00:19:26] +Training completed! +[2026-04-26 00:19:29] Final model: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/model_final.pt +wandb: WARNING Fatal error while uploading data. Some run data will not be synced, but it will still be written to disk. Use `wandb sync` at the end of the run to try uploading. \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/requirements.txt b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f040f697230340f8a88a6e7387f7e8983d11b547 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/requirements.txt @@ -0,0 +1,245 @@ +setuptools==78.1.1 +wheel==0.45.1 +pip==25.2 +webencodings==0.5.1 +triton==3.2.0 +pytz==2025.2 +pydub==0.25.1 +pure_eval==0.2.3 +ptyprocess==0.7.0 +nvidia-ml-py==13.590.48 +nvidia-cusparselt-cu12==0.6.2 +mpmath==1.3.0 +ipython-genutils==0.2.0 +fastjsonschema==2.21.2 +brotli==1.2.0 +antlr4-python3-runtime==4.9.3 +xxhash==3.6.0 +widgetsnbextension==4.0.14 +websocket-client==1.9.0 +webcolors==24.11.1 +wcwidth==0.2.14 +urllib3==2.5.0 +uri-template==1.3.0 +tzdata==2025.2 +typing_extensions==4.15.0 +types-python-dateutil==2.9.0.20251008 +traitlets==5.14.3 +tqdm==4.67.1 +tornado==6.5.2 +tomlkit==0.13.3 +tinycss2==1.4.0 +tabulate==0.9.0 +sympy==1.13.1 +soupsieve==2.8 +sniffio==1.3.1 +smmap==5.0.2 +six==1.17.0 +shellingham==1.5.4 +Send2Trash==1.8.3 +semantic-version==2.10.0 +safetensors==0.6.2 +rpds-py==0.27.1 +rfc3986-validator==0.1.1 +regex==2025.9.18 +pyzmq==27.1.0 +PyYAML==6.0.3 +python-multipart==0.0.22 +python-json-logger==4.0.0 +python-dotenv==1.2.1 +pyparsing==3.2.5 +PyJWT==2.8.0 +Pygments==2.19.2 +pycparser==2.23 +pyarrow==22.0.0 +psutil==7.1.0 +protobuf==6.33.4 +propcache==0.4.1 +prometheus_client==0.23.1 +portalocker==3.2.0 +platformdirs==4.5.0 +pillow==11.3.0 +pexpect==4.9.0 +pathspec==1.0.4 +parso==0.8.5 +pandocfilters==1.5.1 +packaging==25.0 +orjson==3.11.6 +opt_einsum==3.4.0 +nvidia-nvtx-cu12==12.4.127 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nccl-cu12==2.21.5 +nvidia-curand-cu12==10.3.5.147 +nvidia-cufile-cu12==1.13.1.3 +nvidia-cufft-cu12==11.2.1.3 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cublas-cu12==12.4.5.8 +numpy==2.3.3 +ninja==1.13.0 +networkx==3.5 +nest-asyncio==1.6.0 +narwhals==2.15.0 +mypy_extensions==1.1.0 +multidict==6.7.0 +mistune==3.1.4 +mdurl==0.1.2 +MarkupSafe==3.0.3 +lxml==6.0.2 +librt==0.8.0 +lark==1.3.0 +kiwisolver==1.4.9 +jupyterlab_widgets==3.0.15 +jupyterlab_pygments==0.3.0 +jsonpointer==3.0.0 +json5==0.12.1 +itsdangerous==2.2.0 +idna==3.10 +hf-xet==1.1.10 +h11==0.16.0 +groovy==0.1.2 +fsspec==2025.9.0 +frozenlist==1.8.0 +fqdn==1.5.1 +fonttools==4.60.1 +filelock==3.19.1 +ffmpy==1.0.0 +executing==2.2.1 +einops==0.8.1 +dill==0.4.0 +defusedxml==0.7.1 +decorator==5.2.1 +debugpy==1.8.17 +dacite==1.9.2 +cycler==0.12.1 +comm==0.2.3 +colorama==0.4.6 +click==8.3.1 +charset-normalizer==3.4.3 +certifi==2025.10.5 +bleach==6.2.0 +babel==2.17.0 +attrs==25.4.0 +async-lru==2.0.5 +asttokens==3.0.0 +annotated-types==0.7.0 +annotated-doc==0.0.4 +aiohappyeyeballs==2.6.1 +aiofiles==24.1.0 +yarl==1.22.0 +uvicorn==0.40.0 +typing-inspection==0.4.2 +terminado==0.18.1 +stack-data==0.6.3 +sentry-sdk==2.50.0 +scipy==1.17.0 +sacrebleu==2.6.0 +rfc3987-syntax==1.1.0 +rfc3339-validator==0.1.4 +requests==2.32.5 +reportlab==4.4.9 +referencing==0.36.2 +python-dateutil==2.9.0.post0 +pydantic_core==2.41.5 +prompt_toolkit==3.0.52 +plotly==6.5.2 +pathlib2==2.3.7.post1 +orderedmultidict==1.0.2 +optree==0.17.0 +omegaconf==2.3.0 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +mypy==1.19.1 +multiprocess==0.70.16 +matplotlib-inline==0.1.7 +markdown-it-py==4.0.0 +jupyter_core==5.8.1 +Jinja2==3.1.6 +jedi==0.19.2 +ipython_pygments_lexers==1.1.1 +httpcore==1.0.9 +gitdb==4.0.12 +ftfy==6.3.1 +contourpy==1.3.3 +cffi==2.0.0 +beautifulsoup4==4.14.2 +anyio==4.11.0 +aiosignal==1.4.0 +starlette==0.50.0 +rich==14.2.0 +pydantic==2.12.5 +pandas==2.3.3 +nvidia-cusolver-cu12==11.6.1.9 +matplotlib==3.10.7 +jupyter_server_terminals==0.5.3 +jupyter_client==8.6.3 +jsonschema-specifications==2025.9.1 +ipython==9.6.0 +hydra-core==1.3.2 +huggingface-hub==0.35.3 +httpx==0.28.1 +GitPython==3.1.46 +furl==2.1.4 +cryptography==46.0.4 +arrow==1.3.0 +argon2-cffi-bindings==25.1.0 +aiohttp==3.13.1 +wandb==0.24.0 +typer==0.21.1 +torch==2.6.0 +tokenizers==0.22.1 +seaborn==0.13.2 +safehttpx==0.1.7 +jsonschema==4.25.1 +joypy==0.2.6 +isoduration==20.11.0 +ipywidgets==8.1.7 +ipykernel==6.30.1 +gradio_client==2.0.3 +fastapi==0.128.0 +Authlib==1.6.6 +argon2-cffi==25.1.0 +transformers==4.57.6 +nbformat==5.10.4 +mlstm_kernels==2.0.2 +jupyter-console==6.6.3 +gradio==6.5.1 +datasets==4.3.0 +clearml==1.16.4 +accelerate==1.10.1 +xlstm==2.0.4 +nbclient==0.10.2 +jupyter-events==0.12.0 +trackio==0.15.0 +nbconvert==7.16.6 +jupyter_server==2.17.0 +notebook_shim==0.2.4 +jupyterlab_server==2.27.3 +jupyter-lsp==2.3.0 +nbclassic==1.3.3 +jupyterlab==4.4.9 +notebook==7.4.7 +jupyter_contrib_core==0.4.2 +jupyter==1.1.1 +jupyter_nbextensions_configurator==0.6.4 +causal-conv1d==1.5.0.post8 +flash_attn==2.7.4.post1 +mamba-ssm==2.2.4 +hnet==0.0.1 +autocommand==2.2.2 +backports.tarfile==1.2.0 +importlib_metadata==8.0.0 +inflect==7.3.1 +jaraco.collections==5.1.0 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +more-itertools==10.3.0 +packaging==24.2 +platformdirs==4.2.2 +tomli==2.0.1 +typeguard==4.3.0 +typing_extensions==4.12.2 +wheel==0.45.1 +zipp==3.19.2 diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/wandb-metadata.json b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d0fb38d543baab9895d9144be8c349b435364fb1 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/wandb-metadata.json @@ -0,0 +1 @@ +{"os": "Linux-5.4.0-176-generic-x86_64-with-glibc2.35", "python": "CPython 3.12.0", "started_at": "2026-04-25T22:20:11.717689Z", "args": ["tracking=wandb", "tracking.project=code-completion_lr-sweep", "tracking.run_name=hnet_xl_code_lr_2e-4", "training.lr=2e-4", "paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4", "data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full"], "program": "/workspace/byte-llms-code/code_completion_exp/train_hnet/train.py", "code_path": "code_completion_exp/train_hnet/train.py", "code_path_local": "train.py", "git": {"remote_url": "https://github.com/naryst/byte-llms-code.git", "commit": "0a7180b6ab9f63d2794494f09ec4918576d10fa2"}, "email": "nikita@local.ru", "root": "/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4", "host": "7504e518d24a", "executable": "/venv/bytellm/bin/python", "cpu_count": 64, "cpu_count_logical": 128, "gpu_type": "NVIDIA H100 80GB HBM3", "gpu_count": 4, "disk": {"/": {"total": "265214230528", "used": "121389543424"}}, "memory": {"total": "1081679683584"}, "gpu_nvidia": [{"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-b60cdcab-2033-2009-41de-be646c953a20"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-9982b420-4520-4238-c378-ec5a46015474"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134"}], "cuda_version": "12.2", "writer_id": "i2jx9zm2jjq81elpzo2fmxkizkbg0bw5"} \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/wandb-summary.json b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..9b909eafe21b1f52fcc38c171e8501dc3f9733e3 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime": 7157, "train/loss_avg": 0.30111300496024884, "train/lm_loss": 0.22366735339164734, "train/step_time": 0.7080746650695801, "train/hard_boundary_ratio_stage0": 0.35731984922330645, "train/soft_boundary_ratio_stage1": 0.3884844363677051, "_step": 9880, "train/lb_loss": 1.1152780055999756, "train/lr": 2e-05, "train/chunk_len_stage1": 2.422819767367862, "train/loss": 0.19969405978918076, "train/epoch": 1, "train/chunk_len_stage0": 2.819989316907469, "train/hard_boundary_ratio_stage1": 0.41531218191156627, "train/soft_boundary_ratio_stage0": 0.35144078876136436, "_timestamp": 1777162766.8844316, "val/perplexity": 1.3305252412493118, "val/lb_loss": 1.0830148203032357, "val/lm_loss": 0.2882710979098365, "val/time": 4.704254865646362, "best/step": 8000, "val/loss": 0.29910124660957427, "best/val_loss": 0.29910124660957427, "best/val_perplexity": 1.3305252412493118, "epoch/time": 7104.988562345505, "epoch/soft_boundary_ratio_stage0": 0.3514468830948854, "epoch/loss": 0.30111785081837467, "epoch/hard_boundary_ratio_stage0": 0.3573285990023209, "epoch/lm_loss": 0.2911626837279241, "epoch/lb_loss": 1.0948496247683472, "epoch/chunk_len_stage0": 2.8199204649604765, "epoch/chunk_len_stage1": 2.4228836114764665, "epoch/hard_boundary_ratio_stage1": 0.41530060060195706, "epoch/soft_boundary_ratio_stage1": 0.38847202075385673} \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-core.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..ff5d99708ef7ce247949aa10c127adccc0832740 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-core.log @@ -0,0 +1,16 @@ +{"time":"2026-04-25T22:20:11.804586482Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpq3353tb5/port-198705.txt","pid":198705,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-04-25T22:20:11.804980932Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":198705} +{"time":"2026-04-25T22:20:11.804989464Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-198705-198765-1964719290/socket","Net":"unix"}} +{"time":"2026-04-25T22:20:11.99240949Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-04-25T22:20:12.017529004Z","level":"INFO","msg":"handleInformInit: received","streamId":"khn25dwv","id":"1(@)"} +{"time":"2026-04-25T22:20:12.401503529Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"khn25dwv","id":"1(@)"} +{"time":"2026-04-26T00:19:30.37366386Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"khn25dwv","id":"1(@)"} +{"time":"2026-04-26T00:19:30.37416302Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"khn25dwv","id":"1(@)"} +{"time":"2026-04-26T00:19:30.385004444Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-04-26T00:19:30.385029606Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-04-26T00:19:30.38503557Z","level":"INFO","msg":"server is shutting down"} +{"time":"2026-04-26T00:19:30.385040394Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-04-26T00:19:30.385092793Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-04-26T00:19:30.385102377Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-04-26T00:19:30.385084287Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-198705-198765-1964719290/socket","Net":"unix"}} +{"time":"2026-04-26T00:19:30.385111472Z","level":"INFO","msg":"server is closed"} diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-internal.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..38ebdcd269c96a26557edde1eca568896bcb3ee0 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2026-04-25T22:20:12.017648642Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-25T22:20:12.401332467Z","level":"INFO","msg":"stream: created new stream","id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.401402455Z","level":"INFO","msg":"handler: started","stream_id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.401496869Z","level":"INFO","msg":"stream: started","id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.401507601Z","level":"INFO","msg":"writer: started","stream_id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.40151694Z","level":"INFO","msg":"sender: started","stream_id":"khn25dwv"} +{"time":"2026-04-25T22:20:12.5289538Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-25T23:20:22.74400376Z","level":"ERROR","msg":"api: HTTP error","status":403,"method":"POST","url":"https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/khn25dwv/file_stream"} +{"time":"2026-04-25T23:20:22.744078123Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 403 Forbidden url=https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/khn25dwv/file_stream: "} +{"time":"2026-04-26T00:19:30.370077692Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-26T00:19:30.370863672Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-26T00:19:30.373711643Z","level":"INFO","msg":"stream: closing","id":"khn25dwv"} +{"time":"2026-04-26T00:19:30.373733071Z","level":"INFO","msg":"handler: closed","stream_id":"khn25dwv"} +{"time":"2026-04-26T00:19:30.37382892Z","level":"INFO","msg":"sender: closed","stream_id":"khn25dwv"} +{"time":"2026-04-26T00:19:30.373836548Z","level":"INFO","msg":"stream: closed","id":"khn25dwv"} diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug.log b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..d2eb782480e3c71d67c702cc65d44a998e99a273 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug.log @@ -0,0 +1,24 @@ +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_setup.py:_flush():81] Configure stats pid to 198705 +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug.log +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/logs/debug-internal.log +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:init():844] calling init triggers +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'config_path': '/workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json', 'checkpoint_path': '/workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt'}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 24, 'gradient_accumulation_steps': 4, 'lr': 0.0002, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'lr_multiplier': [2.0, 1.5, 1.0], 'load_balancing_weight': 0.01, 'load_balancing_N': 4.0, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None, 'warmup_model': True}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 0, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'hnet_xl_code_lr_2e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_2e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_hnet/train.py'}} +2026-04-25 22:20:11,719 INFO MainThread:198705 [wandb_init.py:init():892] starting backend +2026-04-25 22:20:11,992 INFO MainThread:198705 [wandb_init.py:init():895] sending inform_init request +2026-04-25 22:20:12,016 INFO MainThread:198705 [wandb_init.py:init():903] backend started and connected +2026-04-25 22:20:12,019 INFO MainThread:198705 [wandb_init.py:init():973] updated telemetry +2026-04-25 22:20:12,040 INFO MainThread:198705 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 22:20:12,528 INFO MainThread:198705 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 22:20:12,685 INFO MainThread:198705 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 22:20:12,685 INFO MainThread:198705 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 22:20:12,685 INFO MainThread:198705 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 22:20:12,686 INFO MainThread:198705 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 22:20:12,688 INFO MainThread:198705 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-26 00:19:29,581 INFO MainThread:198705 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/khn25dwv +2026-04-26 00:19:29,582 INFO MainThread:198705 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-26 00:19:29,582 INFO MainThread:198705 [wandb_run.py:_restore():2476] restore +2026-04-26 00:19:29,582 INFO MainThread:198705 [wandb_run.py:_restore():2482] restore done +2026-04-26 00:19:30,373 INFO MainThread:198705 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/run-khn25dwv.wandb b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/run-khn25dwv.wandb new file mode 100644 index 0000000000000000000000000000000000000000..f24ee0206fb787bdb1a7731f34ffd1e8a8f28bba --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/run-khn25dwv.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17f5b1390172e14c28b32a79085198d72420d310c8a5d213678c06b1e2749a4 +size 3073785 diff --git a/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/run-khn25dwv.wandb.synced b/lr_sweep/hnet_xl_code_lr_2e-4/wandb/run-20260425_222011-khn25dwv/run-khn25dwv.wandb.synced new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/config.yaml b/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..05e51b21f97b063c4fedd479c2e22e98431791d3 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/config.yaml @@ -0,0 +1,55 @@ +model: + config_path: ${oc.env:PROJECT_ROOT}/hnet_project/configs/hnet_2stage_XL_code.json + checkpoint_path: ${oc.env:PROJECT_ROOT}/hnet_project/checkpoints/hnet_2stage_XL_code.pt +training: + epochs: 1 + batch_size: 4 + eval_batch_size: 24 + gradient_accumulation_steps: 4 + lr: 0.0005 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + lr_multiplier: + - 2.0 + - 1.5 + - 1.0 + load_balancing_weight: 0.01 + load_balancing_N: 4.0 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null + warmup_model: true +data: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 0 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: hnet_xl_code_lr_5e-4 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: ${paths.output_dir} +paths: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 +seed: 42 +device: cuda diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/hydra.yaml b/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2f64c63909b51e00b52ceae726fd1d0aa9b7649e --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/hydra.yaml @@ -0,0 +1,166 @@ +hydra: + run: + dir: ${paths.output_dir} + sweep: + dir: outputs/multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=hnet_xl_code_lr_5e-4 + - training.lr=5e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + job: + name: train + chdir: false + override_dirname: data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full,paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4,tracking.project=code-completion_lr-sweep,tracking.run_name=hnet_xl_code_lr_5e-4,tracking=wandb,training.lr=5e-4 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /workspace/byte-llms-code/code_completion_exp/train_hnet + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /workspace/byte-llms-code/code_completion_exp/train_hnet/configs + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 + choices: + paths: default + tracking: wandb + logging: default + data: default + training: default + model: hnet_xl_code + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/overrides.yaml b/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5baa897bc84d66e21342afc5e3985879d18c0d27 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/.hydra/overrides.yaml @@ -0,0 +1,6 @@ +- tracking=wandb +- tracking.project=code-completion_lr-sweep +- tracking.run_name=hnet_xl_code_lr_5e-4 +- training.lr=5e-4 +- paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 +- data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/model_final.pt b/lr_sweep/hnet_xl_code_lr_5e-4/model_final.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d8f7e46279116d61b5164b129d088006a29f166 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/model_final.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca40ecd3344a7b1e8dd106b2e920a9fd77225fbebe1fc4969d809db4074cf77 +size 3315165484 diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/debug-internal.log b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..01df25355a4e1432719caaedc9d4e8f68da663d8 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2026-04-26T00:19:41.489729754Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-26T00:19:41.862602753Z","level":"INFO","msg":"stream: created new stream","id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862649982Z","level":"INFO","msg":"handler: started","stream_id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862747075Z","level":"INFO","msg":"stream: started","id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862757167Z","level":"INFO","msg":"writer: started","stream_id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862759475Z","level":"INFO","msg":"sender: started","stream_id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.985346468Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-26T02:18:38.932345179Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-26T02:18:39.035750391Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-26T02:18:39.038119733Z","level":"INFO","msg":"stream: closing","id":"i6lt8av0"} +{"time":"2026-04-26T02:18:39.038132189Z","level":"INFO","msg":"handler: closed","stream_id":"i6lt8av0"} +{"time":"2026-04-26T02:18:39.038193257Z","level":"INFO","msg":"sender: closed","stream_id":"i6lt8av0"} +{"time":"2026-04-26T02:18:39.038198898Z","level":"INFO","msg":"stream: closed","id":"i6lt8av0"} diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/debug.log b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..66c2ad2a57475850ee0c0c3117eb7cd7cd4b26e7 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/debug.log @@ -0,0 +1,24 @@ +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_setup.py:_flush():81] Configure stats pid to 257611 +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug.log +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-internal.log +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:init():844] calling init triggers +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'config_path': '/workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json', 'checkpoint_path': '/workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt'}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 24, 'gradient_accumulation_steps': 4, 'lr': 0.0005, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'lr_multiplier': [2.0, 1.5, 1.0], 'load_balancing_weight': 0.01, 'load_balancing_N': 4.0, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None, 'warmup_model': True}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 0, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'hnet_xl_code_lr_5e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_hnet/train.py'}} +2026-04-26 00:19:41,199 INFO MainThread:257611 [wandb_init.py:init():892] starting backend +2026-04-26 00:19:41,468 INFO MainThread:257611 [wandb_init.py:init():895] sending inform_init request +2026-04-26 00:19:41,488 INFO MainThread:257611 [wandb_init.py:init():903] backend started and connected +2026-04-26 00:19:41,491 INFO MainThread:257611 [wandb_init.py:init():973] updated telemetry +2026-04-26 00:19:41,508 INFO MainThread:257611 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-26 00:19:41,984 INFO MainThread:257611 [wandb_init.py:init():1044] starting run threads in backend +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_console_start():2529] atexit reg +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-26 00:19:42,145 INFO MainThread:257611 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-26 02:18:37,753 INFO MainThread:257611 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/i6lt8av0 +2026-04-26 02:18:37,754 INFO MainThread:257611 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-26 02:18:37,754 INFO MainThread:257611 [wandb_run.py:_restore():2476] restore +2026-04-26 02:18:37,754 INFO MainThread:257611 [wandb_run.py:_restore():2482] restore done +2026-04-26 02:18:39,037 INFO MainThread:257611 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/code/code_completion_exp/train_hnet/train.py b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/code/code_completion_exp/train_hnet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..9c7c306fe6e62d718f1815d106471a779b413a20 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/code/code_completion_exp/train_hnet/train.py @@ -0,0 +1,284 @@ +""" +Training Pipeline для HNet модели на задаче Code Completion. + +Конфигурация через Hydra + OmegaConf, логирование в Trackio. +Поддержка DDP через Accelerate для multi-GPU тренировки. + +Использование: + # Базовый запуск (single GPU) + python train.py + + # Multi-GPU с Accelerate + accelerate launch train.py + + # Multi-GPU с указанием количества GPU + accelerate launch --num_processes=4 train.py + + # Переопределение параметров через CLI + python train.py training.lr=1e-4 training.epochs=5 + + # Выбор другого конфига модели + python train.py model=hnet_small + + # Multirun (sweep) + python train.py --multirun training.lr=1e-4,3e-4,1e-3 + + # Без логирования + python train.py tracking.enabled=false +""" + +import os +import math +from pathlib import Path + +import torch +import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from accelerate import Accelerator +from accelerate.utils import set_seed as accelerate_set_seed + +# HNet imports +from hnet.load_utils import load_from_pretrained, load_from_config +from hnet.utils.tokenizers import ByteTokenizer +from hnet.utils.train import group_params + +# Ensure repo root is on sys.path (needed when running from subdirectory) +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# Shared training library +from training_lib.utils import log_message +from training_lib.checkpointing import save_checkpoint, load_checkpoint +from training_lib.schedulers import get_lr_scheduler +from training_lib.tracking import init_tracking, finish_tracking +from training_lib.hnet.train_loop import train_epoch +from training_lib.hnet.data import create_dataloaders + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def main(cfg: DictConfig): + """Глав��ая функция тренировки с поддержкой DDP чере�� Accelerate.""" + + # === Accelerator Setup === + mixed_precision = "bf16" if cfg.training.use_amp else "no" + + accelerator = Accelerator( + mixed_precision=mixed_precision, + gradient_accumulation_steps=cfg.training.gradient_accumulation_steps, + ) + + # === Setup === + accelerate_set_seed(cfg.seed) + + if cfg.paths.output_dir is None: + cfg.paths.output_dir = HydraConfig.get().runtime.output_dir + + OmegaConf.resolve(cfg) + + log_message( + f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'not set')}", + cfg, + accelerator, + ) + log_message(f"Number of processes: {accelerator.num_processes}", cfg, accelerator) + log_message(f"Process index: {accelerator.process_index}", cfg, accelerator) + log_message(f"Mixed precision: {mixed_precision}", cfg, accelerator) + + log_message("=" * 60, cfg, accelerator) + log_message( + "HNet Training Pipeline (Hydra + Trackio + Accelerate)", cfg, accelerator + ) + log_message("=" * 60, cfg, accelerator) + log_message(f"Config:\n{OmegaConf.to_yaml(cfg)}", cfg, accelerator) + + # === Trackio Init === + init_tracking(cfg, accelerator) + + # === Tokenizer === + log_message("Initializing tokenizer...", cfg, accelerator) + tokenizer = ByteTokenizer() + + # === Model === + log_message("Loading model...", cfg, accelerator) + if cfg.model.checkpoint_path: + model = load_from_pretrained( + model_path=cfg.model.checkpoint_path, + model_config_path=cfg.model.config_path, + ) + log_message(f"Loaded pretrained: {cfg.model.checkpoint_path}", cfg, accelerator) + else: + model = load_from_config( + model_config_path=cfg.model.config_path, + device="cpu", + ) + model.init_weights() + log_message("Initialized from scratch", cfg, accelerator) + + model.train() + + # LR multiplier для разны�� стадий (до prepare!) + lr_multiplier = list(cfg.training.lr_multiplier) + model.apply_lr_multiplier(lr_multiplier) + log_message(f"Applied LR multipliers: {lr_multiplier}", cfg, accelerator) + + # Warmup для Triton kernels + if cfg.training.warmup_model: + log_message("Warming up model...", cfg, accelerator) + model = model.to(accelerator.device) + model.warmup(verbose=accelerator.is_main_process) + + # Log model info + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + log_message(f"Total params: {total_params:,}", cfg, accelerator) + log_message(f"Trainable params: {trainable_params:,}", cfg, accelerator) + + # === Data === + log_message("Creating dataloaders...", cfg, accelerator) + dataloaders = create_dataloaders(cfg, tokenizer) + + train_dataloader = dataloaders["train"] + val_dataloader = dataloaders.get("validation", None) + + log_message( + f"Train dataset size: {len(train_dataloader.dataset)}", cfg, accelerator + ) + log_message( + f"Train batches per epoch (before DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + if val_dataloader: + log_message( + f"Validation dataset size: {len(val_dataloader.dataset)}", cfg, accelerator + ) + log_message(f"Validation batches: {len(val_dataloader)}", cfg, accelerator) + else: + log_message("No validation dataset found", cfg, accelerator) + + # === Optimizer === + log_message("Creating optimizer...", cfg, accelerator) + param_groups = group_params(model) + + for group in param_groups: + if "lr" not in group: + group["lr"] = cfg.training.lr + else: + group["lr"] = cfg.training.lr * group.get("lr_multiplier", 1.0) + if "weight_decay" not in group: + group["weight_decay"] = cfg.training.weight_decay + + optimizer = torch.optim.AdamW( + param_groups, + lr=cfg.training.lr, + betas=tuple(cfg.training.betas), + eps=cfg.training.eps, + ) + + # === Scheduler === + steps_per_epoch = math.ceil(len(train_dataloader) / accelerator.num_processes) + total_steps = ( + cfg.training.epochs + * steps_per_epoch + // cfg.training.gradient_accumulation_steps + ) + scheduler = get_lr_scheduler(optimizer, cfg, total_steps) + + log_message( + f"Total steps: {total_steps}, Steps per epoch: {steps_per_epoch}", + cfg, + accelerator, + ) + + # === Accelerate Prepare === + log_message( + "Preparing model, optimizer, and dataloaders with Accelerate...", + cfg, + accelerator, + ) + + if val_dataloader is not None: + model, optimizer, train_dataloader, val_dataloader, scheduler = ( + accelerator.prepare( + model, optimizer, train_dataloader, val_dataloader, scheduler + ) + ) + else: + model, optimizer, train_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, scheduler + ) + + log_message( + f"Train batches per epoch (after DDP split): {len(train_dataloader)}", + cfg, + accelerator, + ) + + # === Resume === + global_step = 0 + start_epoch = 1 + + if cfg.training.resume and cfg.training.resume_checkpoint: + global_step, start_epoch = load_checkpoint( + model, + optimizer, + scheduler, + cfg.training.resume_checkpoint, + cfg, + accelerator, + ) + start_epoch += 1 + + # === Training Loop === + log_message("Starting training...", cfg, accelerator) + + best_val_loss = float("inf") + + try: + for epoch in range(start_epoch, cfg.training.epochs + 1): + log_message(f"\n{'=' * 60}", cfg, accelerator) + log_message(f"EPOCH {epoch}/{cfg.training.epochs}", cfg, accelerator) + log_message(f"{'=' * 60}", cfg, accelerator) + + global_step, best_val_loss = train_epoch( + model=model, + dataloader=train_dataloader, + optimizer=optimizer, + scheduler=scheduler, + cfg=cfg, + epoch=epoch, + global_step=global_step, + accelerator=accelerator, + val_dataloader=val_dataloader, + best_val_loss=best_val_loss, + ) + + if cfg.logging.save_every_epoch: + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + except KeyboardInterrupt: + log_message("Training interrupted by user", cfg, accelerator) + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + # === Final Save === + log_message("\nTraining completed!", cfg, accelerator) + + if accelerator.is_main_process: + final_model_path = Path(cfg.paths.output_dir) / "model_final.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), final_model_path) + log_message(f"Final model: {final_model_path}", cfg, accelerator) + + accelerator.wait_for_everyone() + accelerator.end_training() + finish_tracking() + + +if __name__ == "__main__": + main() diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/config.yaml b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..efe7c3314ac92dc9b8cc7766d8c1e54776c8963f --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/config.yaml @@ -0,0 +1,151 @@ +_wandb: + value: + cli_version: 0.24.0 + code_path: code/code_completion_exp/train_hnet/train.py + e: + dyau2sb185lrum72o3jj1efzj126ey29: + args: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=hnet_xl_code_lr_5e-4 + - training.lr=5e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + codePath: code_completion_exp/train_hnet/train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.2" + disk: + /: + total: "265214230528" + used: "170465628160" + email: nikita@local.ru + executable: /venv/bytellm/bin/python + git: + commit: 0a7180b6ab9f63d2794494f09ec4918576d10fa2 + remote: https://github.com/naryst/byte-llms-code.git + gpu: NVIDIA H100 80GB HBM3 + gpu_count: 4 + gpu_nvidia: + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-b60cdcab-2033-2009-41de-be646c953a20 + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-9982b420-4520-4238-c378-ec5a46015474 + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134 + host: 7504e518d24a + memory: + total: "1081679683584" + os: Linux-5.4.0-176-generic-x86_64-with-glibc2.35 + program: /workspace/byte-llms-code/code_completion_exp/train_hnet/train.py + python: CPython 3.12.0 + root: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 + startedAt: "2026-04-26T00:19:41.197293Z" + writerId: dyau2sb185lrum72o3jj1efzj126ey29 + m: [] + python_version: 3.12.0 + t: + "1": + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + "2": + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + "3": + - 2 + - 13 + - 16 + - 61 + "4": 3.12.0 + "5": 0.24.0 + "6": 4.57.6 + "12": 0.24.0 + "13": linux-x86_64 +data: + value: + max_context_len: 4096 + max_target_len: 256 + max_train_samples: null + max_val_samples: 2000 + num_workers: 0 + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + pin_memory: true +device: + value: cuda +logging: + value: + eval_interval: 2000 + log_interval: 10 + save_every_epoch: false + save_interval: 0 +model: + value: + checkpoint_path: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt + config_path: /workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json +paths: + value: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 +seed: + value: 42 +tracking: + value: + backend: wandb + base_url: https://wandb.platun0v.ru + enabled: true + entity: null + local_dir: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4 + project: code-completion_lr-sweep + run_name: hnet_xl_code_lr_5e-4 +training: + value: + batch_size: 4 + betas: + - 0.9 + - 0.95 + decay_ratio: 0.2 + epochs: 1 + eps: 1e-08 + eval_batch_size: 24 + gradient_accumulation_steps: 4 + load_balancing_N: 4 + load_balancing_weight: 0.01 + lr: 0.0005 + lr_multiplier: + - 2 + - 1.5 + - 1 + lr_scheduler: wsd + max_grad_norm: 1 + min_lr_ratio: 0.1 + resume: false + resume_checkpoint: null + use_amp: true + warmup_model: true + warmup_ratio: 0.1 + warmup_steps: 100 + weight_decay: 0.1 diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/output.log b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..ee70961f7a1447a97d14d968dee2fec9d40baa02 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/output.log @@ -0,0 +1,1044 @@ +[2026-04-26 00:19:42] Initializing tokenizer... +[2026-04-26 00:19:42] Loading model... +[2026-04-26 00:19:46] Loaded pretrained: /workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt +[2026-04-26 00:19:46] Applied LR multipliers: [2.0, 1.5, 1.0] +[2026-04-26 00:19:46] Warming up model... +[WARMUP] Starting warmup (compiling Triton kernels)... +[WARMUP] Forward: 17.258s, Backward: 26.449s +[WARMUP] Warmup complete. Subsequent passes will be fast. +[2026-04-26 00:20:30] Total params: 1,654,090,112 +[2026-04-26 00:20:30] Trainable params: 1,654,090,112 +[2026-04-26 00:20:30] Creating dataloaders... +[2026-04-26 00:20:30] Train dataset size: 316397 +[2026-04-26 00:20:30] Train batches per epoch (before DDP split): 79100 +[2026-04-26 00:20:30] Validation dataset size: 2000 +[2026-04-26 00:20:30] Validation batches: 84 +[2026-04-26 00:20:30] Creating optimizer... +[2026-04-26 00:20:30] Total steps: 9887, Steps per epoch: 39550 +[2026-04-26 00:20:30] Preparing model, optimizer, and dataloaders with Accelerate... +[2026-04-26 00:20:31] Train batches per epoch (after DDP split): 39550 +[2026-04-26 00:20:31] Starting training... +[2026-04-26 00:20:31] +============================================================ +[2026-04-26 00:20:31] EPOCH 1/1 +[2026-04-26 00:20:31] ============================================================ +[2026-04-26 00:21:00] Epoch 1 | Step 10 | Loss: 0.4872 | LM: 0.4905 | LB: 1.1588 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.477/SR1: 0.456 | LR: 5.91e-05 +[2026-04-26 00:21:07] Epoch 1 | Step 20 | Loss: 0.4350 | LM: 0.4343 | LB: 1.1587 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.478/SR1: 0.458 | LR: 6.82e-05 +[2026-04-26 00:21:14] Epoch 1 | Step 30 | Loss: 0.4059 | LM: 0.3807 | LB: 1.1566 | CL0: 2.9 | CL1: 2.1 | HR0: 0.352/SR0: 0.351 | HR1: 0.477/SR1: 0.455 | LR: 7.73e-05 +[2026-04-26 00:21:21] Epoch 1 | Step 40 | Loss: 0.3922 | LM: 0.3709 | LB: 1.1645 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.481/SR1: 0.459 | LR: 8.64e-05 +[2026-04-26 00:21:28] Epoch 1 | Step 50 | Loss: 0.3724 | LM: 0.3462 | LB: 1.1630 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.458 | LR: 9.55e-05 +[2026-04-26 00:21:36] Epoch 1 | Step 60 | Loss: 0.3575 | LM: 0.3315 | LB: 1.1649 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.481/SR1: 0.461 | LR: 1.05e-04 +[2026-04-26 00:21:43] Epoch 1 | Step 70 | Loss: 0.3491 | LM: 0.3283 | LB: 1.1640 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.480/SR1: 0.459 | LR: 1.14e-04 +[2026-04-26 00:21:50] Epoch 1 | Step 80 | Loss: 0.3458 | LM: 0.3333 | LB: 1.1648 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.459 | LR: 1.23e-04 +[2026-04-26 00:21:57] Epoch 1 | Step 90 | Loss: 0.3442 | LM: 0.3325 | LB: 1.1649 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.481/SR1: 0.459 | LR: 1.32e-04 +[2026-04-26 00:22:04] Epoch 1 | Step 100 | Loss: 0.3400 | LM: 0.3378 | LB: 1.1641 | CL0: 2.8 | CL1: 2.1 | HR0: 0.360/SR0: 0.357 | HR1: 0.479/SR1: 0.458 | LR: 1.41e-04 +[2026-04-26 00:22:11] Epoch 1 | Step 110 | Loss: 0.3399 | LM: 0.3444 | LB: 1.1644 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.480/SR1: 0.458 | LR: 1.50e-04 +[2026-04-26 00:22:18] Epoch 1 | Step 120 | Loss: 0.3394 | LM: 0.3361 | LB: 1.1629 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.479/SR1: 0.457 | LR: 1.59e-04 +[2026-04-26 00:22:25] Epoch 1 | Step 130 | Loss: 0.3381 | LM: 0.3329 | LB: 1.1621 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.479/SR1: 0.457 | LR: 1.68e-04 +[2026-04-26 00:22:33] Epoch 1 | Step 140 | Loss: 0.3372 | LM: 0.3354 | LB: 1.1621 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.478/SR1: 0.457 | LR: 1.78e-04 +[2026-04-26 00:22:40] Epoch 1 | Step 150 | Loss: 0.3331 | LM: 0.3255 | LB: 1.1612 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.477/SR1: 0.456 | LR: 1.87e-04 +[2026-04-26 00:22:47] Epoch 1 | Step 160 | Loss: 0.3306 | LM: 0.3237 | LB: 1.1614 | CL0: 2.8 | CL1: 2.1 | HR0: 0.359/SR0: 0.357 | HR1: 0.477/SR1: 0.456 | LR: 1.96e-04 +[2026-04-26 00:22:54] Epoch 1 | Step 170 | Loss: 0.3322 | LM: 0.3314 | LB: 1.1588 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.475/SR1: 0.454 | LR: 2.05e-04 +[2026-04-26 00:23:01] Epoch 1 | Step 180 | Loss: 0.3292 | LM: 0.3287 | LB: 1.1574 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.474/SR1: 0.453 | LR: 2.14e-04 +[2026-04-26 00:23:08] Epoch 1 | Step 190 | Loss: 0.3294 | LM: 0.3288 | LB: 1.1557 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.472/SR1: 0.451 | LR: 2.23e-04 +[2026-04-26 00:23:15] Epoch 1 | Step 200 | Loss: 0.3293 | LM: 0.3336 | LB: 1.1543 | CL0: 2.8 | CL1: 2.1 | HR0: 0.358/SR0: 0.356 | HR1: 0.471/SR1: 0.451 | LR: 2.32e-04 +[2026-04-26 00:23:23] Epoch 1 | Step 210 | Loss: 0.3305 | LM: 0.3372 | LB: 1.1534 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.471/SR1: 0.450 | LR: 2.41e-04 +[2026-04-26 00:23:30] Epoch 1 | Step 220 | Loss: 0.3301 | LM: 0.3393 | LB: 1.1528 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.355 | HR1: 0.470/SR1: 0.450 | LR: 2.50e-04 +[2026-04-26 00:23:37] Epoch 1 | Step 230 | Loss: 0.3279 | LM: 0.3343 | LB: 1.1522 | CL0: 2.8 | CL1: 2.1 | HR0: 0.357/SR0: 0.356 | HR1: 0.469/SR1: 0.449 | LR: 2.60e-04 +[2026-04-26 00:23:44] Epoch 1 | Step 240 | Loss: 0.3277 | LM: 0.3314 | LB: 1.1507 | CL0: 2.8 | CL1: 2.1 | HR0: 0.356/SR0: 0.355 | HR1: 0.468/SR1: 0.448 | LR: 2.69e-04 +[2026-04-26 00:23:51] Epoch 1 | Step 250 | Loss: 0.3271 | LM: 0.3279 | LB: 1.1498 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.355 | HR1: 0.468/SR1: 0.448 | LR: 2.78e-04 +[2026-04-26 00:23:58] Epoch 1 | Step 260 | Loss: 0.3278 | LM: 0.3311 | LB: 1.1485 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.467/SR1: 0.447 | LR: 2.87e-04 +[2026-04-26 00:24:05] Epoch 1 | Step 270 | Loss: 0.3270 | LM: 0.3328 | LB: 1.1468 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.354 | HR1: 0.465/SR1: 0.445 | LR: 2.96e-04 +[2026-04-26 00:24:12] Epoch 1 | Step 280 | Loss: 0.3258 | LM: 0.3361 | LB: 1.1461 | CL0: 2.8 | CL1: 2.2 | HR0: 0.356/SR0: 0.355 | HR1: 0.464/SR1: 0.445 | LR: 3.05e-04 +[2026-04-26 00:24:20] Epoch 1 | Step 290 | Loss: 0.3252 | LM: 0.3363 | LB: 1.1451 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.462/SR1: 0.443 | LR: 3.14e-04 +[2026-04-26 00:24:27] Epoch 1 | Step 300 | Loss: 0.3259 | LM: 0.3370 | LB: 1.1442 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.461/SR1: 0.442 | LR: 3.23e-04 +[2026-04-26 00:24:34] Epoch 1 | Step 310 | Loss: 0.3258 | LM: 0.3361 | LB: 1.1425 | CL0: 2.8 | CL1: 2.2 | HR0: 0.357/SR0: 0.355 | HR1: 0.459/SR1: 0.441 | LR: 3.32e-04 +[2026-04-26 00:24:41] Epoch 1 | Step 320 | Loss: 0.3250 | LM: 0.3355 | LB: 1.1414 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.458/SR1: 0.439 | LR: 3.41e-04 +[2026-04-26 00:24:48] Epoch 1 | Step 330 | Loss: 0.3249 | LM: 0.3361 | LB: 1.1402 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.457/SR1: 0.438 | LR: 3.51e-04 +[2026-04-26 00:24:55] Epoch 1 | Step 340 | Loss: 0.3247 | LM: 0.3357 | LB: 1.1389 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.455/SR1: 0.437 | LR: 3.60e-04 +[2026-04-26 00:25:02] Epoch 1 | Step 350 | Loss: 0.3248 | LM: 0.3359 | LB: 1.1379 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.454/SR1: 0.436 | LR: 3.69e-04 +[2026-04-26 00:25:09] Epoch 1 | Step 360 | Loss: 0.3242 | LM: 0.3341 | LB: 1.1368 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.452/SR1: 0.435 | LR: 3.78e-04 +[2026-04-26 00:25:17] Epoch 1 | Step 370 | Loss: 0.3235 | LM: 0.3337 | LB: 1.1354 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.451/SR1: 0.434 | LR: 3.87e-04 +[2026-04-26 00:25:24] Epoch 1 | Step 380 | Loss: 0.3229 | LM: 0.3339 | LB: 1.1345 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.450/SR1: 0.433 | LR: 3.96e-04 +[2026-04-26 00:25:31] Epoch 1 | Step 390 | Loss: 0.3233 | LM: 0.3340 | LB: 1.1340 | CL0: 2.8 | CL1: 2.2 | HR0: 0.358/SR0: 0.356 | HR1: 0.449/SR1: 0.432 | LR: 4.05e-04 +[2026-04-26 00:25:38] Epoch 1 | Step 400 | Loss: 0.3240 | LM: 0.3350 | LB: 1.1328 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.448/SR1: 0.431 | LR: 4.14e-04 +[2026-04-26 00:25:45] Epoch 1 | Step 410 | Loss: 0.3244 | LM: 0.3360 | LB: 1.1318 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.447/SR1: 0.430 | LR: 4.23e-04 +[2026-04-26 00:25:52] Epoch 1 | Step 420 | Loss: 0.3246 | LM: 0.3367 | LB: 1.1312 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.430 | LR: 4.33e-04 +[2026-04-26 00:25:59] Epoch 1 | Step 430 | Loss: 0.3260 | LM: 0.3363 | LB: 1.1307 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.446/SR1: 0.430 | LR: 4.42e-04 +[2026-04-26 00:26:06] Epoch 1 | Step 440 | Loss: 0.3252 | LM: 0.3360 | LB: 1.1301 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.445/SR1: 0.429 | LR: 4.51e-04 +[2026-04-26 00:26:13] Epoch 1 | Step 450 | Loss: 0.3258 | LM: 0.3369 | LB: 1.1296 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.445/SR1: 0.429 | LR: 4.60e-04 +[2026-04-26 00:26:21] Epoch 1 | Step 460 | Loss: 0.3265 | LM: 0.3366 | LB: 1.1292 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.444/SR1: 0.429 | LR: 4.69e-04 +[2026-04-26 00:26:28] Epoch 1 | Step 470 | Loss: 0.3265 | LM: 0.3365 | LB: 1.1284 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.356 | HR1: 0.443/SR1: 0.428 | LR: 4.78e-04 +[2026-04-26 00:26:35] Epoch 1 | Step 480 | Loss: 0.3268 | LM: 0.3363 | LB: 1.1277 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.443/SR1: 0.427 | LR: 4.87e-04 +[2026-04-26 00:26:42] Epoch 1 | Step 490 | Loss: 0.3270 | LM: 0.3361 | LB: 1.1271 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.442/SR1: 0.427 | LR: 4.96e-04 +[2026-04-26 00:26:49] Epoch 1 | Step 500 | Loss: 0.3277 | LM: 0.3348 | LB: 1.1264 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.441/SR1: 0.426 | LR: 5.00e-04 +[2026-04-26 00:26:56] Epoch 1 | Step 510 | Loss: 0.3277 | LM: 0.3334 | LB: 1.1257 | CL0: 2.8 | CL1: 2.3 | HR0: 0.358/SR0: 0.355 | HR1: 0.440/SR1: 0.425 | LR: 5.00e-04 +[2026-04-26 00:27:03] Epoch 1 | Step 520 | Loss: 0.3288 | LM: 0.3349 | LB: 1.1248 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.440/SR1: 0.424 | LR: 5.00e-04 +[2026-04-26 00:27:10] Epoch 1 | Step 530 | Loss: 0.3292 | LM: 0.3355 | LB: 1.1240 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.439/SR1: 0.424 | LR: 5.00e-04 +[2026-04-26 00:27:17] Epoch 1 | Step 540 | Loss: 0.3291 | LM: 0.3360 | LB: 1.1234 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.438/SR1: 0.423 | LR: 5.00e-04 +[2026-04-26 00:27:24] Epoch 1 | Step 550 | Loss: 0.3291 | LM: 0.3356 | LB: 1.1229 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.438/SR1: 0.423 | LR: 5.00e-04 +[2026-04-26 00:27:31] Epoch 1 | Step 560 | Loss: 0.3296 | LM: 0.3344 | LB: 1.1226 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.437/SR1: 0.423 | LR: 5.00e-04 +[2026-04-26 00:27:39] Epoch 1 | Step 570 | Loss: 0.3303 | LM: 0.3355 | LB: 1.1226 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.355 | HR1: 0.437/SR1: 0.423 | LR: 5.00e-04 +[2026-04-26 00:27:46] Epoch 1 | Step 580 | Loss: 0.3310 | LM: 0.3361 | LB: 1.1224 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.423 | LR: 5.00e-04 +[2026-04-26 00:27:53] Epoch 1 | Step 590 | Loss: 0.3319 | LM: 0.3367 | LB: 1.1221 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.422 | LR: 5.00e-04 +[2026-04-26 00:28:00] Epoch 1 | Step 600 | Loss: 0.3322 | LM: 0.3359 | LB: 1.1217 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.422 | LR: 5.00e-04 +[2026-04-26 00:28:07] Epoch 1 | Step 610 | Loss: 0.3332 | LM: 0.3360 | LB: 1.1215 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.437/SR1: 0.422 | LR: 5.00e-04 +[2026-04-26 00:28:14] Epoch 1 | Step 620 | Loss: 0.3341 | LM: 0.3366 | LB: 1.1213 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.422 | LR: 5.00e-04 +[2026-04-26 00:28:22] Epoch 1 | Step 630 | Loss: 0.3346 | LM: 0.3385 | LB: 1.1210 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.422 | LR: 5.00e-04 +[2026-04-26 00:28:29] Epoch 1 | Step 640 | Loss: 0.3356 | LM: 0.3384 | LB: 1.1206 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.354 | HR1: 0.436/SR1: 0.421 | LR: 5.00e-04 +[2026-04-26 00:28:36] Epoch 1 | Step 650 | Loss: 0.3358 | LM: 0.3386 | LB: 1.1201 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.435/SR1: 0.421 | LR: 5.00e-04 +[2026-04-26 00:28:43] Epoch 1 | Step 660 | Loss: 0.3366 | LM: 0.3389 | LB: 1.1195 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.435/SR1: 0.420 | LR: 5.00e-04 +[2026-04-26 00:28:50] Epoch 1 | Step 670 | Loss: 0.3370 | LM: 0.3395 | LB: 1.1190 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.434/SR1: 0.420 | LR: 5.00e-04 +[2026-04-26 00:28:57] Epoch 1 | Step 680 | Loss: 0.3377 | LM: 0.3406 | LB: 1.1189 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.434/SR1: 0.420 | LR: 5.00e-04 +[2026-04-26 00:29:04] Epoch 1 | Step 690 | Loss: 0.3381 | LM: 0.3418 | LB: 1.1184 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.434/SR1: 0.419 | LR: 5.00e-04 +[2026-04-26 00:29:11] Epoch 1 | Step 700 | Loss: 0.3390 | LM: 0.3443 | LB: 1.1181 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.433/SR1: 0.419 | LR: 5.00e-04 +[2026-04-26 00:29:19] Epoch 1 | Step 710 | Loss: 0.3392 | LM: 0.3454 | LB: 1.1177 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.433/SR1: 0.419 | LR: 5.00e-04 +[2026-04-26 00:29:26] Epoch 1 | Step 720 | Loss: 0.3398 | LM: 0.3464 | LB: 1.1172 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.432/SR1: 0.418 | LR: 5.00e-04 +[2026-04-26 00:29:33] Epoch 1 | Step 730 | Loss: 0.3403 | LM: 0.3466 | LB: 1.1169 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.432/SR1: 0.418 | LR: 5.00e-04 +[2026-04-26 00:29:40] Epoch 1 | Step 740 | Loss: 0.3406 | LM: 0.3469 | LB: 1.1164 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.431/SR1: 0.417 | LR: 5.00e-04 +[2026-04-26 00:29:47] Epoch 1 | Step 750 | Loss: 0.3409 | LM: 0.3468 | LB: 1.1158 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.431/SR1: 0.417 | LR: 5.00e-04 +[2026-04-26 00:29:54] Epoch 1 | Step 760 | Loss: 0.3418 | LM: 0.3468 | LB: 1.1153 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.430/SR1: 0.416 | LR: 5.00e-04 +[2026-04-26 00:30:01] Epoch 1 | Step 770 | Loss: 0.3429 | LM: 0.3477 | LB: 1.1149 | CL0: 2.8 | CL1: 2.3 | HR0: 0.357/SR0: 0.353 | HR1: 0.430/SR1: 0.416 | LR: 5.00e-04 +[2026-04-26 00:30:08] Epoch 1 | Step 780 | Loss: 0.3433 | LM: 0.3477 | LB: 1.1145 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.429/SR1: 0.416 | LR: 5.00e-04 +[2026-04-26 00:30:15] Epoch 1 | Step 790 | Loss: 0.3436 | LM: 0.3482 | LB: 1.1142 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.429/SR1: 0.415 | LR: 5.00e-04 +[2026-04-26 00:30:22] Epoch 1 | Step 800 | Loss: 0.3434 | LM: 0.3474 | LB: 1.1138 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.429/SR1: 0.415 | LR: 5.00e-04 +[2026-04-26 00:30:30] Epoch 1 | Step 810 | Loss: 0.3437 | LM: 0.3483 | LB: 1.1135 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.428/SR1: 0.415 | LR: 5.00e-04 +[2026-04-26 00:30:37] Epoch 1 | Step 820 | Loss: 0.3440 | LM: 0.3481 | LB: 1.1132 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.428/SR1: 0.415 | LR: 5.00e-04 +[2026-04-26 00:30:44] Epoch 1 | Step 830 | Loss: 0.3447 | LM: 0.3494 | LB: 1.1127 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.428/SR1: 0.414 | LR: 5.00e-04 +[2026-04-26 00:30:51] Epoch 1 | Step 840 | Loss: 0.3450 | LM: 0.3488 | LB: 1.1124 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.427/SR1: 0.414 | LR: 5.00e-04 +[2026-04-26 00:30:58] Epoch 1 | Step 850 | Loss: 0.3448 | LM: 0.3490 | LB: 1.1120 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.427/SR1: 0.413 | LR: 5.00e-04 +[2026-04-26 00:31:05] Epoch 1 | Step 860 | Loss: 0.3454 | LM: 0.3487 | LB: 1.1116 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.426/SR1: 0.413 | LR: 5.00e-04 +[2026-04-26 00:31:12] Epoch 1 | Step 870 | Loss: 0.3464 | LM: 0.3495 | LB: 1.1113 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.426/SR1: 0.412 | LR: 5.00e-04 +[2026-04-26 00:31:19] Epoch 1 | Step 880 | Loss: 0.3471 | LM: 0.3501 | LB: 1.1110 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.425/SR1: 0.412 | LR: 5.00e-04 +[2026-04-26 00:31:26] Epoch 1 | Step 890 | Loss: 0.3477 | LM: 0.3503 | LB: 1.1107 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.352 | HR1: 0.425/SR1: 0.412 | LR: 5.00e-04 +[2026-04-26 00:31:34] Epoch 1 | Step 900 | Loss: 0.3481 | LM: 0.3501 | LB: 1.1105 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.425/SR1: 0.411 | LR: 5.00e-04 +[2026-04-26 00:31:41] Epoch 1 | Step 910 | Loss: 0.3487 | LM: 0.3504 | LB: 1.1101 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.424/SR1: 0.411 | LR: 5.00e-04 +[2026-04-26 00:31:48] Epoch 1 | Step 920 | Loss: 0.3496 | LM: 0.3510 | LB: 1.1099 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.424/SR1: 0.411 | LR: 5.00e-04 +[2026-04-26 00:31:55] Epoch 1 | Step 930 | Loss: 0.3500 | LM: 0.3509 | LB: 1.1096 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:02] Epoch 1 | Step 940 | Loss: 0.3505 | LM: 0.3507 | LB: 1.1093 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:09] Epoch 1 | Step 950 | Loss: 0.3504 | LM: 0.3510 | LB: 1.1092 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:16] Epoch 1 | Step 960 | Loss: 0.3504 | LM: 0.3521 | LB: 1.1091 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:23] Epoch 1 | Step 970 | Loss: 0.3507 | LM: 0.3529 | LB: 1.1089 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:30] Epoch 1 | Step 980 | Loss: 0.3506 | LM: 0.3521 | LB: 1.1087 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:38] Epoch 1 | Step 990 | Loss: 0.3506 | LM: 0.3526 | LB: 1.1086 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.423/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:45] Epoch 1 | Step 1000 | Loss: 0.3507 | LM: 0.3520 | LB: 1.1084 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.422/SR1: 0.410 | LR: 5.00e-04 +[2026-04-26 00:32:52] Epoch 1 | Step 1010 | Loss: 0.3513 | LM: 0.3529 | LB: 1.1082 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.352 | HR1: 0.422/SR1: 0.409 | LR: 5.00e-04 +[2026-04-26 00:32:59] Epoch 1 | Step 1020 | Loss: 0.3515 | LM: 0.3534 | LB: 1.1080 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.422/SR1: 0.409 | LR: 5.00e-04 +[2026-04-26 00:33:06] Epoch 1 | Step 1030 | Loss: 0.3520 | LM: 0.3534 | LB: 1.1077 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.351 | HR1: 0.422/SR1: 0.409 | LR: 5.00e-04 +[2026-04-26 00:33:13] Epoch 1 | Step 1040 | Loss: 0.3518 | LM: 0.3531 | LB: 1.1075 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.351 | HR1: 0.422/SR1: 0.409 | LR: 5.00e-04 +[2026-04-26 00:33:20] Epoch 1 | Step 1050 | Loss: 0.3519 | LM: 0.3533 | LB: 1.1074 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.409 | LR: 5.00e-04 +[2026-04-26 00:33:27] Epoch 1 | Step 1060 | Loss: 0.3517 | LM: 0.3531 | LB: 1.1072 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.409 | LR: 5.00e-04 +[2026-04-26 00:33:34] Epoch 1 | Step 1070 | Loss: 0.3517 | LM: 0.3527 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:33:41] Epoch 1 | Step 1080 | Loss: 0.3524 | LM: 0.3523 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:33:48] Epoch 1 | Step 1090 | Loss: 0.3532 | LM: 0.3533 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:33:56] Epoch 1 | Step 1100 | Loss: 0.3534 | LM: 0.3538 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:03] Epoch 1 | Step 1110 | Loss: 0.3537 | LM: 0.3533 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:10] Epoch 1 | Step 1120 | Loss: 0.3542 | LM: 0.3534 | LB: 1.1070 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:17] Epoch 1 | Step 1130 | Loss: 0.3548 | LM: 0.3529 | LB: 1.1069 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:24] Epoch 1 | Step 1140 | Loss: 0.3551 | LM: 0.3531 | LB: 1.1067 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.421/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:31] Epoch 1 | Step 1150 | Loss: 0.3549 | LM: 0.3527 | LB: 1.1065 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.420/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:38] Epoch 1 | Step 1160 | Loss: 0.3555 | LM: 0.3532 | LB: 1.1064 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.420/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:46] Epoch 1 | Step 1170 | Loss: 0.3560 | LM: 0.3530 | LB: 1.1062 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.420/SR1: 0.408 | LR: 5.00e-04 +[2026-04-26 00:34:53] Epoch 1 | Step 1180 | Loss: 0.3562 | LM: 0.3534 | LB: 1.1059 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.420/SR1: 0.407 | LR: 5.00e-04 +[2026-04-26 00:35:00] Epoch 1 | Step 1190 | Loss: 0.3565 | LM: 0.3538 | LB: 1.1057 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.420/SR1: 0.407 | LR: 5.00e-04 +[2026-04-26 00:35:07] Epoch 1 | Step 1200 | Loss: 0.3566 | LM: 0.3535 | LB: 1.1055 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.419/SR1: 0.407 | LR: 5.00e-04 +[2026-04-26 00:35:14] Epoch 1 | Step 1210 | Loss: 0.3568 | LM: 0.3542 | LB: 1.1054 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.419/SR1: 0.407 | LR: 5.00e-04 +[2026-04-26 00:35:21] Epoch 1 | Step 1220 | Loss: 0.3566 | LM: 0.3534 | LB: 1.1052 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.419/SR1: 0.407 | LR: 5.00e-04 +[2026-04-26 00:35:28] Epoch 1 | Step 1230 | Loss: 0.3570 | LM: 0.3542 | LB: 1.1049 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.419/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:35:35] Epoch 1 | Step 1240 | Loss: 0.3574 | LM: 0.3545 | LB: 1.1047 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:35:43] Epoch 1 | Step 1250 | Loss: 0.3574 | LM: 0.3550 | LB: 1.1045 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:35:50] Epoch 1 | Step 1260 | Loss: 0.3574 | LM: 0.3551 | LB: 1.1043 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:35:57] Epoch 1 | Step 1270 | Loss: 0.3573 | LM: 0.3548 | LB: 1.1042 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.351 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:36:04] Epoch 1 | Step 1280 | Loss: 0.3575 | LM: 0.3547 | LB: 1.1041 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:36:11] Epoch 1 | Step 1290 | Loss: 0.3578 | LM: 0.3551 | LB: 1.1040 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:36:18] Epoch 1 | Step 1300 | Loss: 0.3578 | LM: 0.3553 | LB: 1.1040 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.418/SR1: 0.406 | LR: 5.00e-04 +[2026-04-26 00:36:25] Epoch 1 | Step 1310 | Loss: 0.3580 | LM: 0.3551 | LB: 1.1038 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.417/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:36:33] Epoch 1 | Step 1320 | Loss: 0.3584 | LM: 0.3553 | LB: 1.1036 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.417/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:36:40] Epoch 1 | Step 1330 | Loss: 0.3585 | LM: 0.3553 | LB: 1.1034 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.417/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:36:47] Epoch 1 | Step 1340 | Loss: 0.3587 | LM: 0.3555 | LB: 1.1032 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.417/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:36:54] Epoch 1 | Step 1350 | Loss: 0.3590 | LM: 0.3562 | LB: 1.1033 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.417/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:37:01] Epoch 1 | Step 1360 | Loss: 0.3592 | LM: 0.3559 | LB: 1.1031 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.417/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:37:08] Epoch 1 | Step 1370 | Loss: 0.3593 | LM: 0.3559 | LB: 1.1030 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.416/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:37:15] Epoch 1 | Step 1380 | Loss: 0.3598 | LM: 0.3559 | LB: 1.1028 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.416/SR1: 0.405 | LR: 5.00e-04 +[2026-04-26 00:37:22] Epoch 1 | Step 1390 | Loss: 0.3605 | LM: 0.3565 | LB: 1.1025 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.416/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:37:30] Epoch 1 | Step 1400 | Loss: 0.3606 | LM: 0.3563 | LB: 1.1023 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.350 | HR1: 0.416/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:37:37] Epoch 1 | Step 1410 | Loss: 0.3605 | LM: 0.3554 | LB: 1.1021 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.350 | HR1: 0.416/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:37:44] Epoch 1 | Step 1420 | Loss: 0.3607 | LM: 0.3556 | LB: 1.1020 | CL0: 2.8 | CL1: 2.4 | HR0: 0.357/SR0: 0.349 | HR1: 0.416/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:37:51] Epoch 1 | Step 1430 | Loss: 0.3608 | LM: 0.3559 | LB: 1.1019 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:37:58] Epoch 1 | Step 1440 | Loss: 0.3608 | LM: 0.3559 | LB: 1.1017 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:38:05] Epoch 1 | Step 1450 | Loss: 0.3609 | LM: 0.3557 | LB: 1.1016 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:38:12] Epoch 1 | Step 1460 | Loss: 0.3608 | LM: 0.3556 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:38:20] Epoch 1 | Step 1470 | Loss: 0.3612 | LM: 0.3561 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:38:27] Epoch 1 | Step 1480 | Loss: 0.3612 | LM: 0.3557 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:38:34] Epoch 1 | Step 1490 | Loss: 0.3614 | LM: 0.3560 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:38:41] Epoch 1 | Step 1500 | Loss: 0.3616 | LM: 0.3562 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:38:48] Epoch 1 | Step 1510 | Loss: 0.3618 | LM: 0.3566 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:38:55] Epoch 1 | Step 1520 | Loss: 0.3620 | LM: 0.3565 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:02] Epoch 1 | Step 1530 | Loss: 0.3621 | LM: 0.3564 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:09] Epoch 1 | Step 1540 | Loss: 0.3624 | LM: 0.3572 | LB: 1.1017 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:16] Epoch 1 | Step 1550 | Loss: 0.3626 | LM: 0.3572 | LB: 1.1016 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:23] Epoch 1 | Step 1560 | Loss: 0.3626 | LM: 0.3571 | LB: 1.1015 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:30] Epoch 1 | Step 1570 | Loss: 0.3629 | LM: 0.3575 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.349 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:37] Epoch 1 | Step 1580 | Loss: 0.3630 | LM: 0.3579 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:45] Epoch 1 | Step 1590 | Loss: 0.3632 | LM: 0.3588 | LB: 1.1014 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.404 | LR: 5.00e-04 +[2026-04-26 00:39:52] Epoch 1 | Step 1600 | Loss: 0.3633 | LM: 0.3588 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:39:59] Epoch 1 | Step 1610 | Loss: 0.3632 | LM: 0.3585 | LB: 1.1012 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:06] Epoch 1 | Step 1620 | Loss: 0.3631 | LM: 0.3584 | LB: 1.1011 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:13] Epoch 1 | Step 1630 | Loss: 0.3634 | LM: 0.3585 | LB: 1.1009 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:20] Epoch 1 | Step 1640 | Loss: 0.3634 | LM: 0.3587 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:27] Epoch 1 | Step 1650 | Loss: 0.3633 | LM: 0.3589 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:34] Epoch 1 | Step 1660 | Loss: 0.3633 | LM: 0.3583 | LB: 1.1008 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:42] Epoch 1 | Step 1670 | Loss: 0.3636 | LM: 0.3586 | LB: 1.1009 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:49] Epoch 1 | Step 1680 | Loss: 0.3638 | LM: 0.3585 | LB: 1.1010 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:40:56] Epoch 1 | Step 1690 | Loss: 0.3638 | LM: 0.3582 | LB: 1.1010 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:41:03] Epoch 1 | Step 1700 | Loss: 0.3635 | LM: 0.3578 | LB: 1.1009 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:41:10] Epoch 1 | Step 1710 | Loss: 0.3637 | LM: 0.3573 | LB: 1.1007 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.415/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:41:17] Epoch 1 | Step 1720 | Loss: 0.3637 | LM: 0.3574 | LB: 1.1005 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:41:24] Epoch 1 | Step 1730 | Loss: 0.3637 | LM: 0.3576 | LB: 1.1004 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.403 | LR: 5.00e-04 +[2026-04-26 00:41:31] Epoch 1 | Step 1740 | Loss: 0.3639 | LM: 0.3579 | LB: 1.1002 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.402 | LR: 5.00e-04 +[2026-04-26 00:41:38] Epoch 1 | Step 1750 | Loss: 0.3642 | LM: 0.3583 | LB: 1.1001 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.414/SR1: 0.402 | LR: 5.00e-04 +[2026-04-26 00:41:46] Epoch 1 | Step 1760 | Loss: 0.3643 | LM: 0.3584 | LB: 1.0998 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.413/SR1: 0.402 | LR: 5.00e-04 +[2026-04-26 00:41:53] Epoch 1 | Step 1770 | Loss: 0.3645 | LM: 0.3586 | LB: 1.0996 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.413/SR1: 0.402 | LR: 5.00e-04 +[2026-04-26 00:42:00] Epoch 1 | Step 1780 | Loss: 0.3646 | LM: 0.3585 | LB: 1.0995 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.413/SR1: 0.402 | LR: 5.00e-04 +[2026-04-26 00:42:07] Epoch 1 | Step 1790 | Loss: 0.3649 | LM: 0.3586 | LB: 1.0993 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.413/SR1: 0.401 | LR: 5.00e-04 +[2026-04-26 00:42:14] Epoch 1 | Step 1800 | Loss: 0.3650 | LM: 0.3587 | LB: 1.0991 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.348 | HR1: 0.413/SR1: 0.401 | LR: 5.00e-04 +[2026-04-26 00:42:21] Epoch 1 | Step 1810 | Loss: 0.3651 | LM: 0.3589 | LB: 1.0989 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.347 | HR1: 0.412/SR1: 0.401 | LR: 5.00e-04 +[2026-04-26 00:42:28] Epoch 1 | Step 1820 | Loss: 0.3653 | LM: 0.3589 | LB: 1.0988 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.347 | HR1: 0.412/SR1: 0.401 | LR: 5.00e-04 +[2026-04-26 00:42:35] Epoch 1 | Step 1830 | Loss: 0.3655 | LM: 0.3599 | LB: 1.0986 | CL0: 2.8 | CL1: 2.4 | HR0: 0.356/SR0: 0.347 | HR1: 0.412/SR1: 0.401 | LR: 5.00e-04 +[2026-04-26 00:42:42] Epoch 1 | Step 1840 | Loss: 0.3657 | LM: 0.3602 | LB: 1.0984 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.412/SR1: 0.400 | LR: 5.00e-04 +[2026-04-26 00:42:49] Epoch 1 | Step 1850 | Loss: 0.3656 | LM: 0.3600 | LB: 1.0983 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.412/SR1: 0.400 | LR: 5.00e-04 +[2026-04-26 00:42:56] Epoch 1 | Step 1860 | Loss: 0.3659 | LM: 0.3600 | LB: 1.0982 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.400 | LR: 5.00e-04 +[2026-04-26 00:43:04] Epoch 1 | Step 1870 | Loss: 0.3660 | LM: 0.3598 | LB: 1.0982 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.400 | LR: 5.00e-04 +[2026-04-26 00:43:11] Epoch 1 | Step 1880 | Loss: 0.3660 | LM: 0.3599 | LB: 1.0980 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.400 | LR: 5.00e-04 +[2026-04-26 00:43:18] Epoch 1 | Step 1890 | Loss: 0.3662 | LM: 0.3599 | LB: 1.0978 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.400 | LR: 5.00e-04 +[2026-04-26 00:43:25] Epoch 1 | Step 1900 | Loss: 0.3662 | LM: 0.3600 | LB: 1.0976 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:43:32] Epoch 1 | Step 1910 | Loss: 0.3664 | LM: 0.3603 | LB: 1.0975 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:43:39] Epoch 1 | Step 1920 | Loss: 0.3666 | LM: 0.3601 | LB: 1.0973 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.411/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:43:46] Epoch 1 | Step 1930 | Loss: 0.3666 | LM: 0.3601 | LB: 1.0972 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.410/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:43:53] Epoch 1 | Step 1940 | Loss: 0.3665 | LM: 0.3603 | LB: 1.0971 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.410/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:44:01] Epoch 1 | Step 1950 | Loss: 0.3665 | LM: 0.3604 | LB: 1.0969 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.410/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:44:08] Epoch 1 | Step 1960 | Loss: 0.3667 | LM: 0.3608 | LB: 1.0968 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.410/SR1: 0.399 | LR: 5.00e-04 +[2026-04-26 00:44:15] Epoch 1 | Step 1970 | Loss: 0.3669 | LM: 0.3612 | LB: 1.0966 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.410/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:44:22] Epoch 1 | Step 1980 | Loss: 0.3671 | LM: 0.3613 | LB: 1.0965 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.410/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:44:29] Epoch 1 | Step 1990 | Loss: 0.3672 | LM: 0.3616 | LB: 1.0963 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.409/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:44:36] Epoch 1 | Step 2000 | Loss: 0.3673 | LM: 0.3616 | LB: 1.0962 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.409/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:44:37] Validation | Batch 10/42 | Loss: 0.3763 | LM_LOSS: 0.3657 | LB_LOSS: 1.0613 +[2026-04-26 00:44:38] Validation | Batch 20/42 | Loss: 0.3979 | LM_LOSS: 0.3872 | LB_LOSS: 1.0651 +[2026-04-26 00:44:39] Validation | Batch 30/42 | Loss: 0.3903 | LM_LOSS: 0.3797 | LB_LOSS: 1.0634 +[2026-04-26 00:44:40] Validation | Batch 40/42 | Loss: 0.3950 | LM_LOSS: 0.3843 | LB_LOSS: 1.0631 +[2026-04-26 00:44:41] Validation | Batch 42/42 | Loss: 0.3958 | LM_LOSS: 0.3852 | LB_LOSS: 1.0633 +[2026-04-26 00:44:41] Validation | Loss: 0.3958 | LM_LOSS: 0.3852 | LB_LOSS: 1.0633 | PPL: 1.46 | Time: 4.51s +[2026-04-26 00:44:43] New best model saved! Val loss: 0.3958 +[2026-04-26 00:44:50] Epoch 1 | Step 2010 | Loss: 0.3675 | LM: 0.3620 | LB: 1.0961 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.409/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:44:57] Epoch 1 | Step 2020 | Loss: 0.3676 | LM: 0.3621 | LB: 1.0960 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.409/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:45:05] Epoch 1 | Step 2030 | Loss: 0.3677 | LM: 0.3621 | LB: 1.0959 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.347 | HR1: 0.409/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:45:12] Epoch 1 | Step 2040 | Loss: 0.3678 | LM: 0.3620 | LB: 1.0958 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.409/SR1: 0.398 | LR: 5.00e-04 +[2026-04-26 00:45:19] Epoch 1 | Step 2050 | Loss: 0.3678 | LM: 0.3619 | LB: 1.0956 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.409/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:45:26] Epoch 1 | Step 2060 | Loss: 0.3679 | LM: 0.3623 | LB: 1.0955 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.409/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:45:33] Epoch 1 | Step 2070 | Loss: 0.3677 | LM: 0.3618 | LB: 1.0954 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:45:40] Epoch 1 | Step 2080 | Loss: 0.3676 | LM: 0.3618 | LB: 1.0953 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:45:47] Epoch 1 | Step 2090 | Loss: 0.3677 | LM: 0.3619 | LB: 1.0952 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:45:54] Epoch 1 | Step 2100 | Loss: 0.3678 | LM: 0.3617 | LB: 1.0950 | CL0: 2.8 | CL1: 2.5 | HR0: 0.356/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:46:01] Epoch 1 | Step 2110 | Loss: 0.3680 | LM: 0.3617 | LB: 1.0949 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:46:08] Epoch 1 | Step 2120 | Loss: 0.3679 | LM: 0.3621 | LB: 1.0949 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:46:16] Epoch 1 | Step 2130 | Loss: 0.3680 | LM: 0.3626 | LB: 1.0949 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:46:23] Epoch 1 | Step 2140 | Loss: 0.3679 | LM: 0.3632 | LB: 1.0948 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:46:30] Epoch 1 | Step 2150 | Loss: 0.3680 | LM: 0.3631 | LB: 1.0947 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.397 | LR: 5.00e-04 +[2026-04-26 00:46:37] Epoch 1 | Step 2160 | Loss: 0.3681 | LM: 0.3632 | LB: 1.0946 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:46:44] Epoch 1 | Step 2170 | Loss: 0.3681 | LM: 0.3630 | LB: 1.0945 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:46:51] Epoch 1 | Step 2180 | Loss: 0.3681 | LM: 0.3631 | LB: 1.0944 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:46:58] Epoch 1 | Step 2190 | Loss: 0.3681 | LM: 0.3629 | LB: 1.0944 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.408/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:05] Epoch 1 | Step 2200 | Loss: 0.3682 | LM: 0.3631 | LB: 1.0943 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:12] Epoch 1 | Step 2210 | Loss: 0.3684 | LM: 0.3635 | LB: 1.0942 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:19] Epoch 1 | Step 2220 | Loss: 0.3687 | LM: 0.3638 | LB: 1.0941 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:27] Epoch 1 | Step 2230 | Loss: 0.3689 | LM: 0.3638 | LB: 1.0940 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:34] Epoch 1 | Step 2240 | Loss: 0.3691 | LM: 0.3636 | LB: 1.0940 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.346 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:41] Epoch 1 | Step 2250 | Loss: 0.3693 | LM: 0.3638 | LB: 1.0939 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:48] Epoch 1 | Step 2260 | Loss: 0.3693 | LM: 0.3635 | LB: 1.0938 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:47:55] Epoch 1 | Step 2270 | Loss: 0.3694 | LM: 0.3640 | LB: 1.0938 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:48:02] Epoch 1 | Step 2280 | Loss: 0.3695 | LM: 0.3641 | LB: 1.0937 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.396 | LR: 5.00e-04 +[2026-04-26 00:48:09] Epoch 1 | Step 2290 | Loss: 0.3699 | LM: 0.3644 | LB: 1.0935 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:16] Epoch 1 | Step 2300 | Loss: 0.3699 | LM: 0.3641 | LB: 1.0934 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:23] Epoch 1 | Step 2310 | Loss: 0.3699 | LM: 0.3642 | LB: 1.0933 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:30] Epoch 1 | Step 2320 | Loss: 0.3701 | LM: 0.3643 | LB: 1.0933 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.407/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:37] Epoch 1 | Step 2330 | Loss: 0.3701 | LM: 0.3645 | LB: 1.0932 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:45] Epoch 1 | Step 2340 | Loss: 0.3702 | LM: 0.3649 | LB: 1.0931 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:52] Epoch 1 | Step 2350 | Loss: 0.3702 | LM: 0.3651 | LB: 1.0930 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:48:59] Epoch 1 | Step 2360 | Loss: 0.3703 | LM: 0.3652 | LB: 1.0929 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:49:06] Epoch 1 | Step 2370 | Loss: 0.3704 | LM: 0.3652 | LB: 1.0928 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:49:13] Epoch 1 | Step 2380 | Loss: 0.3703 | LM: 0.3651 | LB: 1.0927 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:49:20] Epoch 1 | Step 2390 | Loss: 0.3705 | LM: 0.3652 | LB: 1.0926 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.395 | LR: 5.00e-04 +[2026-04-26 00:49:27] Epoch 1 | Step 2400 | Loss: 0.3706 | LM: 0.3656 | LB: 1.0926 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:49:34] Epoch 1 | Step 2410 | Loss: 0.3708 | LM: 0.3662 | LB: 1.0925 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.406/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:49:41] Epoch 1 | Step 2420 | Loss: 0.3710 | LM: 0.3660 | LB: 1.0924 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:49:49] Epoch 1 | Step 2430 | Loss: 0.3710 | LM: 0.3661 | LB: 1.0923 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:49:56] Epoch 1 | Step 2440 | Loss: 0.3710 | LM: 0.3661 | LB: 1.0922 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:50:03] Epoch 1 | Step 2450 | Loss: 0.3710 | LM: 0.3661 | LB: 1.0921 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:50:10] Epoch 1 | Step 2460 | Loss: 0.3710 | LM: 0.3661 | LB: 1.0920 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:50:17] Epoch 1 | Step 2470 | Loss: 0.3711 | LM: 0.3660 | LB: 1.0919 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:50:24] Epoch 1 | Step 2480 | Loss: 0.3712 | LM: 0.3659 | LB: 1.0918 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:50:31] Epoch 1 | Step 2490 | Loss: 0.3711 | LM: 0.3662 | LB: 1.0917 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.394 | LR: 5.00e-04 +[2026-04-26 00:50:39] Epoch 1 | Step 2500 | Loss: 0.3711 | LM: 0.3663 | LB: 1.0916 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:50:46] Epoch 1 | Step 2510 | Loss: 0.3712 | LM: 0.3666 | LB: 1.0916 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.405/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:50:53] Epoch 1 | Step 2520 | Loss: 0.3709 | LM: 0.3662 | LB: 1.0915 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:00] Epoch 1 | Step 2530 | Loss: 0.3709 | LM: 0.3663 | LB: 1.0915 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:07] Epoch 1 | Step 2540 | Loss: 0.3709 | LM: 0.3662 | LB: 1.0914 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:14] Epoch 1 | Step 2550 | Loss: 0.3707 | LM: 0.3656 | LB: 1.0913 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:21] Epoch 1 | Step 2560 | Loss: 0.3707 | LM: 0.3658 | LB: 1.0913 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:28] Epoch 1 | Step 2570 | Loss: 0.3708 | LM: 0.3661 | LB: 1.0912 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:35] Epoch 1 | Step 2580 | Loss: 0.3710 | LM: 0.3662 | LB: 1.0911 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:43] Epoch 1 | Step 2590 | Loss: 0.3711 | LM: 0.3660 | LB: 1.0910 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.393 | LR: 5.00e-04 +[2026-04-26 00:51:50] Epoch 1 | Step 2600 | Loss: 0.3712 | LM: 0.3663 | LB: 1.0909 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:51:57] Epoch 1 | Step 2610 | Loss: 0.3713 | LM: 0.3659 | LB: 1.0909 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.404/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:04] Epoch 1 | Step 2620 | Loss: 0.3713 | LM: 0.3659 | LB: 1.0908 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:11] Epoch 1 | Step 2630 | Loss: 0.3713 | LM: 0.3660 | LB: 1.0907 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:18] Epoch 1 | Step 2640 | Loss: 0.3715 | LM: 0.3658 | LB: 1.0906 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:25] Epoch 1 | Step 2650 | Loss: 0.3713 | LM: 0.3657 | LB: 1.0906 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:32] Epoch 1 | Step 2660 | Loss: 0.3714 | LM: 0.3660 | LB: 1.0905 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:39] Epoch 1 | Step 2670 | Loss: 0.3714 | LM: 0.3663 | LB: 1.0904 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:46] Epoch 1 | Step 2680 | Loss: 0.3714 | LM: 0.3663 | LB: 1.0903 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:52:53] Epoch 1 | Step 2690 | Loss: 0.3714 | LM: 0.3666 | LB: 1.0902 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:53:01] Epoch 1 | Step 2700 | Loss: 0.3714 | LM: 0.3667 | LB: 1.0901 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.403/SR1: 0.392 | LR: 5.00e-04 +[2026-04-26 00:53:08] Epoch 1 | Step 2710 | Loss: 0.3713 | LM: 0.3665 | LB: 1.0900 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:15] Epoch 1 | Step 2720 | Loss: 0.3714 | LM: 0.3666 | LB: 1.0899 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:22] Epoch 1 | Step 2730 | Loss: 0.3714 | LM: 0.3666 | LB: 1.0898 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:29] Epoch 1 | Step 2740 | Loss: 0.3715 | LM: 0.3666 | LB: 1.0897 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:36] Epoch 1 | Step 2750 | Loss: 0.3716 | LM: 0.3666 | LB: 1.0895 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:43] Epoch 1 | Step 2760 | Loss: 0.3715 | LM: 0.3662 | LB: 1.0894 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.345 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:51] Epoch 1 | Step 2770 | Loss: 0.3715 | LM: 0.3663 | LB: 1.0893 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.402/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:53:58] Epoch 1 | Step 2780 | Loss: 0.3716 | LM: 0.3660 | LB: 1.0892 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.391 | LR: 5.00e-04 +[2026-04-26 00:54:05] Epoch 1 | Step 2790 | Loss: 0.3716 | LM: 0.3662 | LB: 1.0892 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:12] Epoch 1 | Step 2800 | Loss: 0.3716 | LM: 0.3663 | LB: 1.0891 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:19] Epoch 1 | Step 2810 | Loss: 0.3717 | LM: 0.3663 | LB: 1.0890 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:26] Epoch 1 | Step 2820 | Loss: 0.3718 | LM: 0.3666 | LB: 1.0889 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:33] Epoch 1 | Step 2830 | Loss: 0.3718 | LM: 0.3666 | LB: 1.0888 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:41] Epoch 1 | Step 2840 | Loss: 0.3722 | LM: 0.3670 | LB: 1.0887 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:48] Epoch 1 | Step 2850 | Loss: 0.3723 | LM: 0.3675 | LB: 1.0887 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:54:55] Epoch 1 | Step 2860 | Loss: 0.3724 | LM: 0.3675 | LB: 1.0886 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:55:02] Epoch 1 | Step 2870 | Loss: 0.3724 | LM: 0.3676 | LB: 1.0886 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.401/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:55:09] Epoch 1 | Step 2880 | Loss: 0.3724 | LM: 0.3675 | LB: 1.0885 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:55:16] Epoch 1 | Step 2890 | Loss: 0.3724 | LM: 0.3673 | LB: 1.0884 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.390 | LR: 5.00e-04 +[2026-04-26 00:55:23] Epoch 1 | Step 2900 | Loss: 0.3723 | LM: 0.3671 | LB: 1.0883 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:55:31] Epoch 1 | Step 2910 | Loss: 0.3724 | LM: 0.3672 | LB: 1.0882 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:55:38] Epoch 1 | Step 2920 | Loss: 0.3725 | LM: 0.3675 | LB: 1.0882 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:55:45] Epoch 1 | Step 2930 | Loss: 0.3724 | LM: 0.3673 | LB: 1.0881 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:55:52] Epoch 1 | Step 2940 | Loss: 0.3723 | LM: 0.3673 | LB: 1.0880 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:55:59] Epoch 1 | Step 2950 | Loss: 0.3725 | LM: 0.3675 | LB: 1.0879 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:06] Epoch 1 | Step 2960 | Loss: 0.3726 | LM: 0.3677 | LB: 1.0879 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:13] Epoch 1 | Step 2970 | Loss: 0.3727 | LM: 0.3678 | LB: 1.0878 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.400/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:21] Epoch 1 | Step 2980 | Loss: 0.3726 | LM: 0.3678 | LB: 1.0877 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:28] Epoch 1 | Step 2990 | Loss: 0.3728 | LM: 0.3679 | LB: 1.0876 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:35] Epoch 1 | Step 3000 | Loss: 0.3728 | LM: 0.3679 | LB: 1.0875 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:42] Epoch 1 | Step 3010 | Loss: 0.3728 | LM: 0.3679 | LB: 1.0875 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:49] Epoch 1 | Step 3020 | Loss: 0.3727 | LM: 0.3678 | LB: 1.0874 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:56:57] Epoch 1 | Step 3030 | Loss: 0.3727 | LM: 0.3679 | LB: 1.0874 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:57:04] Epoch 1 | Step 3040 | Loss: 0.3726 | LM: 0.3677 | LB: 1.0874 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:57:11] Epoch 1 | Step 3050 | Loss: 0.3726 | LM: 0.3674 | LB: 1.0873 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:57:18] Epoch 1 | Step 3060 | Loss: 0.3727 | LM: 0.3680 | LB: 1.0873 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:57:25] Epoch 1 | Step 3070 | Loss: 0.3726 | LM: 0.3680 | LB: 1.0873 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:57:32] Epoch 1 | Step 3080 | Loss: 0.3726 | LM: 0.3678 | LB: 1.0872 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.389 | LR: 5.00e-04 +[2026-04-26 00:57:39] Epoch 1 | Step 3090 | Loss: 0.3725 | LM: 0.3675 | LB: 1.0871 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:57:46] Epoch 1 | Step 3100 | Loss: 0.3725 | LM: 0.3675 | LB: 1.0871 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:57:54] Epoch 1 | Step 3110 | Loss: 0.3724 | LM: 0.3674 | LB: 1.0871 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:01] Epoch 1 | Step 3120 | Loss: 0.3727 | LM: 0.3676 | LB: 1.0871 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:08] Epoch 1 | Step 3130 | Loss: 0.3727 | LM: 0.3677 | LB: 1.0870 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:15] Epoch 1 | Step 3140 | Loss: 0.3728 | LM: 0.3676 | LB: 1.0870 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:22] Epoch 1 | Step 3150 | Loss: 0.3730 | LM: 0.3679 | LB: 1.0869 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:29] Epoch 1 | Step 3160 | Loss: 0.3730 | LM: 0.3677 | LB: 1.0868 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.399/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:36] Epoch 1 | Step 3170 | Loss: 0.3731 | LM: 0.3679 | LB: 1.0868 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.398/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:43] Epoch 1 | Step 3180 | Loss: 0.3731 | LM: 0.3683 | LB: 1.0867 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.398/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:51] Epoch 1 | Step 3190 | Loss: 0.3730 | LM: 0.3680 | LB: 1.0866 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.398/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:58:58] Epoch 1 | Step 3200 | Loss: 0.3730 | LM: 0.3678 | LB: 1.0865 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.398/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:59:05] Epoch 1 | Step 3210 | Loss: 0.3730 | LM: 0.3675 | LB: 1.0864 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.398/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:59:12] Epoch 1 | Step 3220 | Loss: 0.3729 | LM: 0.3674 | LB: 1.0863 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.344 | HR1: 0.398/SR1: 0.388 | LR: 5.00e-04 +[2026-04-26 00:59:19] Epoch 1 | Step 3230 | Loss: 0.3730 | LM: 0.3674 | LB: 1.0862 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.398/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 00:59:26] Epoch 1 | Step 3240 | Loss: 0.3729 | LM: 0.3673 | LB: 1.0862 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.398/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 00:59:33] Epoch 1 | Step 3250 | Loss: 0.3730 | LM: 0.3673 | LB: 1.0861 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.398/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 00:59:40] Epoch 1 | Step 3260 | Loss: 0.3731 | LM: 0.3672 | LB: 1.0860 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.398/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 00:59:48] Epoch 1 | Step 3270 | Loss: 0.3731 | LM: 0.3674 | LB: 1.0860 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.398/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 00:59:55] Epoch 1 | Step 3280 | Loss: 0.3730 | LM: 0.3673 | LB: 1.0859 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.398/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:02] Epoch 1 | Step 3290 | Loss: 0.3730 | LM: 0.3673 | LB: 1.0859 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:09] Epoch 1 | Step 3300 | Loss: 0.3731 | LM: 0.3677 | LB: 1.0859 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:16] Epoch 1 | Step 3310 | Loss: 0.3731 | LM: 0.3676 | LB: 1.0858 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:23] Epoch 1 | Step 3320 | Loss: 0.3732 | LM: 0.3676 | LB: 1.0857 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:31] Epoch 1 | Step 3330 | Loss: 0.3732 | LM: 0.3677 | LB: 1.0856 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:38] Epoch 1 | Step 3340 | Loss: 0.3733 | LM: 0.3679 | LB: 1.0856 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:45] Epoch 1 | Step 3350 | Loss: 0.3732 | LM: 0.3679 | LB: 1.0856 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.387 | LR: 5.00e-04 +[2026-04-26 01:00:52] Epoch 1 | Step 3360 | Loss: 0.3732 | LM: 0.3678 | LB: 1.0854 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:00:59] Epoch 1 | Step 3370 | Loss: 0.3733 | LM: 0.3677 | LB: 1.0854 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:06] Epoch 1 | Step 3380 | Loss: 0.3732 | LM: 0.3677 | LB: 1.0854 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:14] Epoch 1 | Step 3390 | Loss: 0.3733 | LM: 0.3678 | LB: 1.0853 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:21] Epoch 1 | Step 3400 | Loss: 0.3735 | LM: 0.3679 | LB: 1.0852 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.397/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:28] Epoch 1 | Step 3410 | Loss: 0.3735 | LM: 0.3680 | LB: 1.0852 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:35] Epoch 1 | Step 3420 | Loss: 0.3735 | LM: 0.3682 | LB: 1.0851 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:42] Epoch 1 | Step 3430 | Loss: 0.3735 | LM: 0.3684 | LB: 1.0850 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:49] Epoch 1 | Step 3440 | Loss: 0.3736 | LM: 0.3686 | LB: 1.0850 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:01:56] Epoch 1 | Step 3450 | Loss: 0.3736 | LM: 0.3687 | LB: 1.0849 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:02:03] Epoch 1 | Step 3460 | Loss: 0.3736 | LM: 0.3685 | LB: 1.0849 | CL0: 2.8 | CL1: 2.5 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:02:11] Epoch 1 | Step 3470 | Loss: 0.3736 | LM: 0.3684 | LB: 1.0848 | CL0: 2.8 | CL1: 2.6 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:02:18] Epoch 1 | Step 3480 | Loss: 0.3736 | LM: 0.3685 | LB: 1.0847 | CL0: 2.8 | CL1: 2.6 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:02:25] Epoch 1 | Step 3490 | Loss: 0.3736 | LM: 0.3685 | LB: 1.0846 | CL0: 2.8 | CL1: 2.6 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:02:32] Epoch 1 | Step 3500 | Loss: 0.3735 | LM: 0.3684 | LB: 1.0845 | CL0: 2.8 | CL1: 2.6 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.386 | LR: 5.00e-04 +[2026-04-26 01:02:39] Epoch 1 | Step 3510 | Loss: 0.3736 | LM: 0.3685 | LB: 1.0845 | CL0: 2.8 | CL1: 2.6 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:02:46] Epoch 1 | Step 3520 | Loss: 0.3736 | LM: 0.3687 | LB: 1.0844 | CL0: 2.8 | CL1: 2.6 | HR0: 0.355/SR0: 0.343 | HR1: 0.396/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:02:53] Epoch 1 | Step 3530 | Loss: 0.3737 | LM: 0.3686 | LB: 1.0843 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.396/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:01] Epoch 1 | Step 3540 | Loss: 0.3737 | LM: 0.3683 | LB: 1.0843 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.396/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:08] Epoch 1 | Step 3550 | Loss: 0.3737 | LM: 0.3684 | LB: 1.0842 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:15] Epoch 1 | Step 3560 | Loss: 0.3738 | LM: 0.3686 | LB: 1.0841 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:22] Epoch 1 | Step 3570 | Loss: 0.3738 | LM: 0.3686 | LB: 1.0840 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:29] Epoch 1 | Step 3580 | Loss: 0.3738 | LM: 0.3687 | LB: 1.0839 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:36] Epoch 1 | Step 3590 | Loss: 0.3738 | LM: 0.3687 | LB: 1.0839 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:43] Epoch 1 | Step 3600 | Loss: 0.3738 | LM: 0.3687 | LB: 1.0838 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:50] Epoch 1 | Step 3610 | Loss: 0.3737 | LM: 0.3686 | LB: 1.0837 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.343 | HR1: 0.395/SR1: 0.385 | LR: 5.00e-04 +[2026-04-26 01:03:57] Epoch 1 | Step 3620 | Loss: 0.3737 | LM: 0.3683 | LB: 1.0836 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.395/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:05] Epoch 1 | Step 3630 | Loss: 0.3738 | LM: 0.3685 | LB: 1.0835 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.395/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:12] Epoch 1 | Step 3640 | Loss: 0.3739 | LM: 0.3687 | LB: 1.0835 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.395/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:19] Epoch 1 | Step 3650 | Loss: 0.3740 | LM: 0.3690 | LB: 1.0834 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:26] Epoch 1 | Step 3660 | Loss: 0.3739 | LM: 0.3690 | LB: 1.0834 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:33] Epoch 1 | Step 3670 | Loss: 0.3739 | LM: 0.3687 | LB: 1.0833 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:40] Epoch 1 | Step 3680 | Loss: 0.3739 | LM: 0.3688 | LB: 1.0832 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:47] Epoch 1 | Step 3690 | Loss: 0.3740 | LM: 0.3690 | LB: 1.0832 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:04:54] Epoch 1 | Step 3700 | Loss: 0.3739 | LM: 0.3689 | LB: 1.0831 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:05:02] Epoch 1 | Step 3710 | Loss: 0.3739 | LM: 0.3689 | LB: 1.0831 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:05:09] Epoch 1 | Step 3720 | Loss: 0.3739 | LM: 0.3691 | LB: 1.0830 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:05:16] Epoch 1 | Step 3730 | Loss: 0.3740 | LM: 0.3690 | LB: 1.0829 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:05:23] Epoch 1 | Step 3740 | Loss: 0.3740 | LM: 0.3689 | LB: 1.0829 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:05:31] Epoch 1 | Step 3750 | Loss: 0.3739 | LM: 0.3687 | LB: 1.0828 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.384 | LR: 5.00e-04 +[2026-04-26 01:05:38] Epoch 1 | Step 3760 | Loss: 0.3740 | LM: 0.3688 | LB: 1.0827 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:05:45] Epoch 1 | Step 3770 | Loss: 0.3740 | LM: 0.3689 | LB: 1.0827 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.394/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:05:52] Epoch 1 | Step 3780 | Loss: 0.3741 | LM: 0.3689 | LB: 1.0826 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:05:59] Epoch 1 | Step 3790 | Loss: 0.3742 | LM: 0.3690 | LB: 1.0826 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:07] Epoch 1 | Step 3800 | Loss: 0.3743 | LM: 0.3691 | LB: 1.0825 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:14] Epoch 1 | Step 3810 | Loss: 0.3741 | LM: 0.3688 | LB: 1.0825 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:21] Epoch 1 | Step 3820 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0824 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:28] Epoch 1 | Step 3830 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0823 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:35] Epoch 1 | Step 3840 | Loss: 0.3741 | LM: 0.3691 | LB: 1.0822 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:42] Epoch 1 | Step 3850 | Loss: 0.3741 | LM: 0.3690 | LB: 1.0822 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:49] Epoch 1 | Step 3860 | Loss: 0.3741 | LM: 0.3690 | LB: 1.0821 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:06:56] Epoch 1 | Step 3870 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0820 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:03] Epoch 1 | Step 3880 | Loss: 0.3740 | LM: 0.3685 | LB: 1.0820 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:11] Epoch 1 | Step 3890 | Loss: 0.3740 | LM: 0.3687 | LB: 1.0819 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:18] Epoch 1 | Step 3900 | Loss: 0.3740 | LM: 0.3687 | LB: 1.0819 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:25] Epoch 1 | Step 3910 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0819 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:32] Epoch 1 | Step 3920 | Loss: 0.3742 | LM: 0.3687 | LB: 1.0819 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:39] Epoch 1 | Step 3930 | Loss: 0.3742 | LM: 0.3688 | LB: 1.0818 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.393/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:46] Epoch 1 | Step 3940 | Loss: 0.3742 | LM: 0.3687 | LB: 1.0818 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:07:53] Epoch 1 | Step 3950 | Loss: 0.3741 | LM: 0.3684 | LB: 1.0818 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:08:00] Epoch 1 | Step 3960 | Loss: 0.3741 | LM: 0.3685 | LB: 1.0817 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:08:07] Epoch 1 | Step 3970 | Loss: 0.3741 | LM: 0.3684 | LB: 1.0817 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.383 | LR: 5.00e-04 +[2026-04-26 01:08:15] Epoch 1 | Step 3980 | Loss: 0.3741 | LM: 0.3684 | LB: 1.0817 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.383 | LR: 4.99e-04 +[2026-04-26 01:08:22] Epoch 1 | Step 3990 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0816 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.382 | LR: 4.99e-04 +[2026-04-26 01:08:29] Epoch 1 | Step 4000 | Loss: 0.3741 | LM: 0.3686 | LB: 1.0816 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.382 | LR: 4.98e-04 +[2026-04-26 01:08:30] Validation | Batch 10/42 | Loss: 0.3711 | LM_LOSS: 0.3604 | LB_LOSS: 1.0699 +[2026-04-26 01:08:31] Validation | Batch 20/42 | Loss: 0.3913 | LM_LOSS: 0.3806 | LB_LOSS: 1.0735 +[2026-04-26 01:08:32] Validation | Batch 30/42 | Loss: 0.3810 | LM_LOSS: 0.3703 | LB_LOSS: 1.0721 +[2026-04-26 01:08:33] Validation | Batch 40/42 | Loss: 0.3872 | LM_LOSS: 0.3765 | LB_LOSS: 1.0715 +[2026-04-26 01:08:33] Validation | Batch 42/42 | Loss: 0.3869 | LM_LOSS: 0.3762 | LB_LOSS: 1.0719 +[2026-04-26 01:08:33] Validation | Loss: 0.3869 | LM_LOSS: 0.3762 | LB_LOSS: 1.0719 | PPL: 1.45 | Time: 4.60s +[2026-04-26 01:08:36] New best model saved! Val loss: 0.3869 +[2026-04-26 01:08:43] Epoch 1 | Step 4010 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0816 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.382 | LR: 4.97e-04 +[2026-04-26 01:08:51] Epoch 1 | Step 4020 | Loss: 0.3741 | LM: 0.3688 | LB: 1.0815 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.382 | LR: 4.95e-04 +[2026-04-26 01:08:58] Epoch 1 | Step 4030 | Loss: 0.3740 | LM: 0.3686 | LB: 1.0815 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.342 | HR1: 0.392/SR1: 0.382 | LR: 4.94e-04 +[2026-04-26 01:09:05] Epoch 1 | Step 4040 | Loss: 0.3739 | LM: 0.3686 | LB: 1.0815 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.92e-04 +[2026-04-26 01:09:12] Epoch 1 | Step 4050 | Loss: 0.3739 | LM: 0.3686 | LB: 1.0815 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.90e-04 +[2026-04-26 01:09:19] Epoch 1 | Step 4060 | Loss: 0.3737 | LM: 0.3684 | LB: 1.0814 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.88e-04 +[2026-04-26 01:09:26] Epoch 1 | Step 4070 | Loss: 0.3738 | LM: 0.3685 | LB: 1.0814 | CL0: 2.8 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.85e-04 +[2026-04-26 01:09:33] Epoch 1 | Step 4080 | Loss: 0.3738 | LM: 0.3686 | LB: 1.0813 | CL0: 2.9 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.82e-04 +[2026-04-26 01:09:40] Epoch 1 | Step 4090 | Loss: 0.3739 | LM: 0.3686 | LB: 1.0813 | CL0: 2.9 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.80e-04 +[2026-04-26 01:09:47] Epoch 1 | Step 4100 | Loss: 0.3739 | LM: 0.3687 | LB: 1.0812 | CL0: 2.9 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.77e-04 +[2026-04-26 01:09:54] Epoch 1 | Step 4110 | Loss: 0.3739 | LM: 0.3687 | LB: 1.0812 | CL0: 2.9 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.73e-04 +[2026-04-26 01:10:02] Epoch 1 | Step 4120 | Loss: 0.3740 | LM: 0.3689 | LB: 1.0811 | CL0: 2.9 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.70e-04 +[2026-04-26 01:10:09] Epoch 1 | Step 4130 | Loss: 0.3740 | LM: 0.3686 | LB: 1.0811 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.66e-04 +[2026-04-26 01:10:16] Epoch 1 | Step 4140 | Loss: 0.3741 | LM: 0.3687 | LB: 1.0810 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.62e-04 +[2026-04-26 01:10:23] Epoch 1 | Step 4150 | Loss: 0.3742 | LM: 0.3689 | LB: 1.0810 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.58e-04 +[2026-04-26 01:10:30] Epoch 1 | Step 4160 | Loss: 0.3744 | LM: 0.3692 | LB: 1.0810 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.54e-04 +[2026-04-26 01:10:37] Epoch 1 | Step 4170 | Loss: 0.3744 | LM: 0.3693 | LB: 1.0809 | CL0: 2.9 | CL1: 2.6 | HR0: 0.354/SR0: 0.341 | HR1: 0.392/SR1: 0.382 | LR: 4.49e-04 +[2026-04-26 01:10:44] Epoch 1 | Step 4180 | Loss: 0.3744 | LM: 0.3694 | LB: 1.0809 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.45e-04 +[2026-04-26 01:10:51] Epoch 1 | Step 4190 | Loss: 0.3743 | LM: 0.3694 | LB: 1.0808 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.40e-04 +[2026-04-26 01:10:58] Epoch 1 | Step 4200 | Loss: 0.3745 | LM: 0.3696 | LB: 1.0808 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.35e-04 +[2026-04-26 01:11:05] Epoch 1 | Step 4210 | Loss: 0.3745 | LM: 0.3696 | LB: 1.0807 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.30e-04 +[2026-04-26 01:11:13] Epoch 1 | Step 4220 | Loss: 0.3747 | LM: 0.3698 | LB: 1.0807 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.25e-04 +[2026-04-26 01:11:20] Epoch 1 | Step 4230 | Loss: 0.3747 | LM: 0.3699 | LB: 1.0806 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.19e-04 +[2026-04-26 01:11:27] Epoch 1 | Step 4240 | Loss: 0.3748 | LM: 0.3701 | LB: 1.0806 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.14e-04 +[2026-04-26 01:11:34] Epoch 1 | Step 4250 | Loss: 0.3749 | LM: 0.3701 | LB: 1.0806 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.08e-04 +[2026-04-26 01:11:41] Epoch 1 | Step 4260 | Loss: 0.3748 | LM: 0.3699 | LB: 1.0805 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.382 | LR: 4.02e-04 +[2026-04-26 01:11:48] Epoch 1 | Step 4270 | Loss: 0.3749 | LM: 0.3698 | LB: 1.0805 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.381 | LR: 3.96e-04 +[2026-04-26 01:11:55] Epoch 1 | Step 4280 | Loss: 0.3749 | LM: 0.3697 | LB: 1.0804 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.381 | LR: 3.90e-04 +[2026-04-26 01:12:02] Epoch 1 | Step 4290 | Loss: 0.3748 | LM: 0.3696 | LB: 1.0804 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.381 | LR: 3.84e-04 +[2026-04-26 01:12:09] Epoch 1 | Step 4300 | Loss: 0.3748 | LM: 0.3696 | LB: 1.0803 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.381 | LR: 3.78e-04 +[2026-04-26 01:12:16] Epoch 1 | Step 4310 | Loss: 0.3749 | LM: 0.3695 | LB: 1.0803 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.381 | LR: 3.71e-04 +[2026-04-26 01:12:23] Epoch 1 | Step 4320 | Loss: 0.3749 | LM: 0.3694 | LB: 1.0802 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.391/SR1: 0.381 | LR: 3.65e-04 +[2026-04-26 01:12:31] Epoch 1 | Step 4330 | Loss: 0.3749 | LM: 0.3695 | LB: 1.0801 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.58e-04 +[2026-04-26 01:12:38] Epoch 1 | Step 4340 | Loss: 0.3748 | LM: 0.3697 | LB: 1.0801 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.52e-04 +[2026-04-26 01:12:45] Epoch 1 | Step 4350 | Loss: 0.3747 | LM: 0.3697 | LB: 1.0800 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.45e-04 +[2026-04-26 01:12:52] Epoch 1 | Step 4360 | Loss: 0.3747 | LM: 0.3696 | LB: 1.0800 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.38e-04 +[2026-04-26 01:12:59] Epoch 1 | Step 4370 | Loss: 0.3747 | LM: 0.3698 | LB: 1.0799 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.31e-04 +[2026-04-26 01:13:06] Epoch 1 | Step 4380 | Loss: 0.3746 | LM: 0.3698 | LB: 1.0799 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.24e-04 +[2026-04-26 01:13:13] Epoch 1 | Step 4390 | Loss: 0.3747 | LM: 0.3698 | LB: 1.0798 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.17e-04 +[2026-04-26 01:13:20] Epoch 1 | Step 4400 | Loss: 0.3747 | LM: 0.3696 | LB: 1.0798 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.10e-04 +[2026-04-26 01:13:27] Epoch 1 | Step 4410 | Loss: 0.3746 | LM: 0.3695 | LB: 1.0797 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 3.03e-04 +[2026-04-26 01:13:34] Epoch 1 | Step 4420 | Loss: 0.3746 | LM: 0.3696 | LB: 1.0797 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.381 | LR: 2.96e-04 +[2026-04-26 01:13:41] Epoch 1 | Step 4430 | Loss: 0.3745 | LM: 0.3695 | LB: 1.0796 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.380 | LR: 2.89e-04 +[2026-04-26 01:13:49] Epoch 1 | Step 4440 | Loss: 0.3746 | LM: 0.3695 | LB: 1.0796 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.380 | LR: 2.82e-04 +[2026-04-26 01:13:56] Epoch 1 | Step 4450 | Loss: 0.3746 | LM: 0.3693 | LB: 1.0795 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.380 | LR: 2.74e-04 +[2026-04-26 01:14:03] Epoch 1 | Step 4460 | Loss: 0.3747 | LM: 0.3693 | LB: 1.0795 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.380 | LR: 2.67e-04 +[2026-04-26 01:14:10] Epoch 1 | Step 4470 | Loss: 0.3745 | LM: 0.3692 | LB: 1.0794 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.380 | LR: 2.60e-04 +[2026-04-26 01:14:17] Epoch 1 | Step 4480 | Loss: 0.3745 | LM: 0.3692 | LB: 1.0794 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.390/SR1: 0.380 | LR: 2.53e-04 +[2026-04-26 01:14:24] Epoch 1 | Step 4490 | Loss: 0.3744 | LM: 0.3692 | LB: 1.0793 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.46e-04 +[2026-04-26 01:14:31] Epoch 1 | Step 4500 | Loss: 0.3743 | LM: 0.3691 | LB: 1.0792 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.39e-04 +[2026-04-26 01:14:39] Epoch 1 | Step 4510 | Loss: 0.3742 | LM: 0.3689 | LB: 1.0792 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.32e-04 +[2026-04-26 01:14:46] Epoch 1 | Step 4520 | Loss: 0.3741 | LM: 0.3690 | LB: 1.0792 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.25e-04 +[2026-04-26 01:14:53] Epoch 1 | Step 4530 | Loss: 0.3740 | LM: 0.3688 | LB: 1.0791 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.18e-04 +[2026-04-26 01:15:00] Epoch 1 | Step 4540 | Loss: 0.3740 | LM: 0.3689 | LB: 1.0791 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.11e-04 +[2026-04-26 01:15:08] Epoch 1 | Step 4550 | Loss: 0.3739 | LM: 0.3688 | LB: 1.0790 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 2.04e-04 +[2026-04-26 01:15:15] Epoch 1 | Step 4560 | Loss: 0.3739 | LM: 0.3687 | LB: 1.0790 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 1.97e-04 +[2026-04-26 01:15:22] Epoch 1 | Step 4570 | Loss: 0.3739 | LM: 0.3686 | LB: 1.0789 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 1.91e-04 +[2026-04-26 01:15:29] Epoch 1 | Step 4580 | Loss: 0.3738 | LM: 0.3686 | LB: 1.0789 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 1.84e-04 +[2026-04-26 01:15:36] Epoch 1 | Step 4590 | Loss: 0.3738 | LM: 0.3686 | LB: 1.0789 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 1.78e-04 +[2026-04-26 01:15:43] Epoch 1 | Step 4600 | Loss: 0.3737 | LM: 0.3684 | LB: 1.0788 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.380 | LR: 1.71e-04 +[2026-04-26 01:15:51] Epoch 1 | Step 4610 | Loss: 0.3736 | LM: 0.3683 | LB: 1.0788 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.389/SR1: 0.380 | LR: 1.65e-04 +[2026-04-26 01:15:58] Epoch 1 | Step 4620 | Loss: 0.3736 | LM: 0.3682 | LB: 1.0788 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.341 | HR1: 0.389/SR1: 0.379 | LR: 1.59e-04 +[2026-04-26 01:16:05] Epoch 1 | Step 4630 | Loss: 0.3736 | LM: 0.3683 | LB: 1.0787 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.389/SR1: 0.379 | LR: 1.53e-04 +[2026-04-26 01:16:12] Epoch 1 | Step 4640 | Loss: 0.3736 | LM: 0.3683 | LB: 1.0787 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.389/SR1: 0.379 | LR: 1.47e-04 +[2026-04-26 01:16:19] Epoch 1 | Step 4650 | Loss: 0.3736 | LM: 0.3682 | LB: 1.0786 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.389/SR1: 0.379 | LR: 1.41e-04 +[2026-04-26 01:16:27] Epoch 1 | Step 4660 | Loss: 0.3736 | LM: 0.3681 | LB: 1.0786 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.35e-04 +[2026-04-26 01:16:34] Epoch 1 | Step 4670 | Loss: 0.3735 | LM: 0.3681 | LB: 1.0785 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.30e-04 +[2026-04-26 01:16:41] Epoch 1 | Step 4680 | Loss: 0.3735 | LM: 0.3681 | LB: 1.0784 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.24e-04 +[2026-04-26 01:16:48] Epoch 1 | Step 4690 | Loss: 0.3734 | LM: 0.3679 | LB: 1.0784 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.19e-04 +[2026-04-26 01:16:55] Epoch 1 | Step 4700 | Loss: 0.3734 | LM: 0.3680 | LB: 1.0784 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.14e-04 +[2026-04-26 01:17:02] Epoch 1 | Step 4710 | Loss: 0.3734 | LM: 0.3679 | LB: 1.0783 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.09e-04 +[2026-04-26 01:17:10] Epoch 1 | Step 4720 | Loss: 0.3734 | LM: 0.3680 | LB: 1.0783 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 1.04e-04 +[2026-04-26 01:17:17] Epoch 1 | Step 4730 | Loss: 0.3733 | LM: 0.3679 | LB: 1.0782 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 9.98e-05 +[2026-04-26 01:17:24] Epoch 1 | Step 4740 | Loss: 0.3733 | LM: 0.3678 | LB: 1.0782 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 9.54e-05 +[2026-04-26 01:17:31] Epoch 1 | Step 4750 | Loss: 0.3733 | LM: 0.3677 | LB: 1.0781 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 9.12e-05 +[2026-04-26 01:17:38] Epoch 1 | Step 4760 | Loss: 0.3732 | LM: 0.3677 | LB: 1.0781 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 8.72e-05 +[2026-04-26 01:17:45] Epoch 1 | Step 4770 | Loss: 0.3731 | LM: 0.3674 | LB: 1.0781 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 8.33e-05 +[2026-04-26 01:17:53] Epoch 1 | Step 4780 | Loss: 0.3731 | LM: 0.3674 | LB: 1.0781 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 7.97e-05 +[2026-04-26 01:18:00] Epoch 1 | Step 4790 | Loss: 0.3731 | LM: 0.3673 | LB: 1.0780 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 7.62e-05 +[2026-04-26 01:18:07] Epoch 1 | Step 4800 | Loss: 0.3730 | LM: 0.3672 | LB: 1.0780 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 7.30e-05 +[2026-04-26 01:18:14] Epoch 1 | Step 4810 | Loss: 0.3729 | LM: 0.3671 | LB: 1.0779 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.379 | LR: 6.99e-05 +[2026-04-26 01:18:21] Epoch 1 | Step 4820 | Loss: 0.3728 | LM: 0.3669 | LB: 1.0779 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.378 | LR: 6.71e-05 +[2026-04-26 01:18:28] Epoch 1 | Step 4830 | Loss: 0.3726 | LM: 0.3666 | LB: 1.0779 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.378 | LR: 6.45e-05 +[2026-04-26 01:18:35] Epoch 1 | Step 4840 | Loss: 0.3726 | LM: 0.3665 | LB: 1.0778 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.388/SR1: 0.378 | LR: 6.21e-05 +[2026-04-26 01:18:42] Epoch 1 | Step 4850 | Loss: 0.3726 | LM: 0.3666 | LB: 1.0778 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.99e-05 +[2026-04-26 01:18:49] Epoch 1 | Step 4860 | Loss: 0.3727 | LM: 0.3666 | LB: 1.0777 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.79e-05 +[2026-04-26 01:18:56] Epoch 1 | Step 4870 | Loss: 0.3727 | LM: 0.3665 | LB: 1.0777 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.61e-05 +[2026-04-26 01:19:03] Epoch 1 | Step 4880 | Loss: 0.3727 | LM: 0.3665 | LB: 1.0776 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.46e-05 +[2026-04-26 01:19:10] Epoch 1 | Step 4890 | Loss: 0.3726 | LM: 0.3665 | LB: 1.0776 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.32e-05 +[2026-04-26 01:19:18] Epoch 1 | Step 4900 | Loss: 0.3725 | LM: 0.3663 | LB: 1.0776 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.21e-05 +[2026-04-26 01:19:25] Epoch 1 | Step 4910 | Loss: 0.3725 | LM: 0.3662 | LB: 1.0775 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.13e-05 +[2026-04-26 01:19:32] Epoch 1 | Step 4920 | Loss: 0.3724 | LM: 0.3661 | LB: 1.0775 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.06e-05 +[2026-04-26 01:19:39] Epoch 1 | Step 4930 | Loss: 0.3724 | LM: 0.3660 | LB: 1.0774 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.02e-05 +[2026-04-26 01:19:46] Epoch 1 | Step 4940 | Loss: 0.3723 | LM: 0.3660 | LB: 1.0774 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:19:53] Epoch 1 | Step 4950 | Loss: 0.3723 | LM: 0.3661 | LB: 1.0773 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:00] Epoch 1 | Step 4960 | Loss: 0.3723 | LM: 0.3660 | LB: 1.0773 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:07] Epoch 1 | Step 4970 | Loss: 0.3722 | LM: 0.3660 | LB: 1.0773 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:14] Epoch 1 | Step 4980 | Loss: 0.3722 | LM: 0.3659 | LB: 1.0772 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:22] Epoch 1 | Step 4990 | Loss: 0.3720 | LM: 0.3658 | LB: 1.0772 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:29] Epoch 1 | Step 5000 | Loss: 0.3721 | LM: 0.3659 | LB: 1.0771 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:36] Epoch 1 | Step 5010 | Loss: 0.3720 | LM: 0.3659 | LB: 1.0771 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:43] Epoch 1 | Step 5020 | Loss: 0.3719 | LM: 0.3657 | LB: 1.0771 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:50] Epoch 1 | Step 5030 | Loss: 0.3719 | LM: 0.3657 | LB: 1.0771 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:20:57] Epoch 1 | Step 5040 | Loss: 0.3718 | LM: 0.3655 | LB: 1.0770 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:21:04] Epoch 1 | Step 5050 | Loss: 0.3718 | LM: 0.3655 | LB: 1.0770 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.387/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:21:11] Epoch 1 | Step 5060 | Loss: 0.3718 | LM: 0.3653 | LB: 1.0770 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.386/SR1: 0.378 | LR: 5.00e-05 +[2026-04-26 01:21:19] Epoch 1 | Step 5070 | Loss: 0.3717 | LM: 0.3652 | LB: 1.0769 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:21:26] Epoch 1 | Step 5080 | Loss: 0.3717 | LM: 0.3652 | LB: 1.0769 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:21:33] Epoch 1 | Step 5090 | Loss: 0.3717 | LM: 0.3652 | LB: 1.0769 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:21:40] Epoch 1 | Step 5100 | Loss: 0.3716 | LM: 0.3651 | LB: 1.0768 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:21:47] Epoch 1 | Step 5110 | Loss: 0.3715 | LM: 0.3651 | LB: 1.0768 | CL0: 2.9 | CL1: 2.6 | HR0: 0.353/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:21:54] Epoch 1 | Step 5120 | Loss: 0.3716 | LM: 0.3652 | LB: 1.0767 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:01] Epoch 1 | Step 5130 | Loss: 0.3715 | LM: 0.3650 | LB: 1.0767 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:09] Epoch 1 | Step 5140 | Loss: 0.3714 | LM: 0.3650 | LB: 1.0767 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:16] Epoch 1 | Step 5150 | Loss: 0.3713 | LM: 0.3649 | LB: 1.0766 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:23] Epoch 1 | Step 5160 | Loss: 0.3711 | LM: 0.3648 | LB: 1.0766 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:30] Epoch 1 | Step 5170 | Loss: 0.3711 | LM: 0.3648 | LB: 1.0765 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:37] Epoch 1 | Step 5180 | Loss: 0.3710 | LM: 0.3647 | LB: 1.0765 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:44] Epoch 1 | Step 5190 | Loss: 0.3710 | LM: 0.3647 | LB: 1.0765 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:51] Epoch 1 | Step 5200 | Loss: 0.3709 | LM: 0.3646 | LB: 1.0764 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:22:58] Epoch 1 | Step 5210 | Loss: 0.3709 | LM: 0.3645 | LB: 1.0764 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:05] Epoch 1 | Step 5220 | Loss: 0.3709 | LM: 0.3644 | LB: 1.0764 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:13] Epoch 1 | Step 5230 | Loss: 0.3709 | LM: 0.3643 | LB: 1.0763 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:20] Epoch 1 | Step 5240 | Loss: 0.3708 | LM: 0.3641 | LB: 1.0763 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:27] Epoch 1 | Step 5250 | Loss: 0.3708 | LM: 0.3643 | LB: 1.0763 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:34] Epoch 1 | Step 5260 | Loss: 0.3707 | LM: 0.3642 | LB: 1.0762 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.386/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:41] Epoch 1 | Step 5270 | Loss: 0.3707 | LM: 0.3641 | LB: 1.0762 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:48] Epoch 1 | Step 5280 | Loss: 0.3706 | LM: 0.3639 | LB: 1.0762 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:23:55] Epoch 1 | Step 5290 | Loss: 0.3705 | LM: 0.3639 | LB: 1.0761 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:24:02] Epoch 1 | Step 5300 | Loss: 0.3705 | LM: 0.3638 | LB: 1.0761 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.377 | LR: 5.00e-05 +[2026-04-26 01:24:09] Epoch 1 | Step 5310 | Loss: 0.3705 | LM: 0.3638 | LB: 1.0761 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:17] Epoch 1 | Step 5320 | Loss: 0.3704 | LM: 0.3636 | LB: 1.0761 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:24] Epoch 1 | Step 5330 | Loss: 0.3704 | LM: 0.3636 | LB: 1.0760 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:31] Epoch 1 | Step 5340 | Loss: 0.3704 | LM: 0.3636 | LB: 1.0760 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:38] Epoch 1 | Step 5350 | Loss: 0.3703 | LM: 0.3636 | LB: 1.0759 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:45] Epoch 1 | Step 5360 | Loss: 0.3703 | LM: 0.3634 | LB: 1.0759 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.340 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:52] Epoch 1 | Step 5370 | Loss: 0.3702 | LM: 0.3633 | LB: 1.0758 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:24:59] Epoch 1 | Step 5380 | Loss: 0.3701 | LM: 0.3632 | LB: 1.0758 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:06] Epoch 1 | Step 5390 | Loss: 0.3700 | LM: 0.3630 | LB: 1.0757 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:14] Epoch 1 | Step 5400 | Loss: 0.3700 | LM: 0.3630 | LB: 1.0757 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:21] Epoch 1 | Step 5410 | Loss: 0.3699 | LM: 0.3629 | LB: 1.0757 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:28] Epoch 1 | Step 5420 | Loss: 0.3699 | LM: 0.3629 | LB: 1.0757 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:35] Epoch 1 | Step 5430 | Loss: 0.3698 | LM: 0.3629 | LB: 1.0757 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:42] Epoch 1 | Step 5440 | Loss: 0.3698 | LM: 0.3628 | LB: 1.0756 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:49] Epoch 1 | Step 5450 | Loss: 0.3698 | LM: 0.3627 | LB: 1.0756 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:25:56] Epoch 1 | Step 5460 | Loss: 0.3698 | LM: 0.3626 | LB: 1.0755 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:03] Epoch 1 | Step 5470 | Loss: 0.3697 | LM: 0.3625 | LB: 1.0755 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.385/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:10] Epoch 1 | Step 5480 | Loss: 0.3697 | LM: 0.3623 | LB: 1.0755 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:17] Epoch 1 | Step 5490 | Loss: 0.3697 | LM: 0.3624 | LB: 1.0754 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:24] Epoch 1 | Step 5500 | Loss: 0.3697 | LM: 0.3625 | LB: 1.0754 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:32] Epoch 1 | Step 5510 | Loss: 0.3697 | LM: 0.3626 | LB: 1.0753 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:40] Epoch 1 | Step 5520 | Loss: 0.3696 | LM: 0.3625 | LB: 1.0753 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:47] Epoch 1 | Step 5530 | Loss: 0.3696 | LM: 0.3624 | LB: 1.0753 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:26:54] Epoch 1 | Step 5540 | Loss: 0.3694 | LM: 0.3622 | LB: 1.0752 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:27:01] Epoch 1 | Step 5550 | Loss: 0.3694 | LM: 0.3621 | LB: 1.0752 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.376 | LR: 5.00e-05 +[2026-04-26 01:27:08] Epoch 1 | Step 5560 | Loss: 0.3693 | LM: 0.3621 | LB: 1.0752 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:15] Epoch 1 | Step 5570 | Loss: 0.3694 | LM: 0.3621 | LB: 1.0751 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:22] Epoch 1 | Step 5580 | Loss: 0.3693 | LM: 0.3620 | LB: 1.0751 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:29] Epoch 1 | Step 5590 | Loss: 0.3692 | LM: 0.3621 | LB: 1.0751 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:37] Epoch 1 | Step 5600 | Loss: 0.3692 | LM: 0.3620 | LB: 1.0751 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:44] Epoch 1 | Step 5610 | Loss: 0.3692 | LM: 0.3621 | LB: 1.0750 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:51] Epoch 1 | Step 5620 | Loss: 0.3692 | LM: 0.3620 | LB: 1.0750 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:27:58] Epoch 1 | Step 5630 | Loss: 0.3691 | LM: 0.3620 | LB: 1.0750 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:05] Epoch 1 | Step 5640 | Loss: 0.3691 | LM: 0.3621 | LB: 1.0749 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:12] Epoch 1 | Step 5650 | Loss: 0.3691 | LM: 0.3621 | LB: 1.0749 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:19] Epoch 1 | Step 5660 | Loss: 0.3690 | LM: 0.3620 | LB: 1.0749 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:27] Epoch 1 | Step 5670 | Loss: 0.3690 | LM: 0.3620 | LB: 1.0749 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:34] Epoch 1 | Step 5680 | Loss: 0.3688 | LM: 0.3619 | LB: 1.0748 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:41] Epoch 1 | Step 5690 | Loss: 0.3689 | LM: 0.3619 | LB: 1.0748 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:48] Epoch 1 | Step 5700 | Loss: 0.3688 | LM: 0.3619 | LB: 1.0748 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:28:55] Epoch 1 | Step 5710 | Loss: 0.3688 | LM: 0.3618 | LB: 1.0747 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:02] Epoch 1 | Step 5720 | Loss: 0.3688 | LM: 0.3618 | LB: 1.0747 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:09] Epoch 1 | Step 5730 | Loss: 0.3687 | LM: 0.3619 | LB: 1.0747 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.384/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:16] Epoch 1 | Step 5740 | Loss: 0.3687 | LM: 0.3618 | LB: 1.0746 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:24] Epoch 1 | Step 5750 | Loss: 0.3687 | LM: 0.3618 | LB: 1.0746 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:31] Epoch 1 | Step 5760 | Loss: 0.3687 | LM: 0.3617 | LB: 1.0746 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:38] Epoch 1 | Step 5770 | Loss: 0.3687 | LM: 0.3617 | LB: 1.0746 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:45] Epoch 1 | Step 5780 | Loss: 0.3687 | LM: 0.3617 | LB: 1.0745 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:52] Epoch 1 | Step 5790 | Loss: 0.3687 | LM: 0.3615 | LB: 1.0745 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:29:59] Epoch 1 | Step 5800 | Loss: 0.3687 | LM: 0.3616 | LB: 1.0744 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:07] Epoch 1 | Step 5810 | Loss: 0.3687 | LM: 0.3617 | LB: 1.0744 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:14] Epoch 1 | Step 5820 | Loss: 0.3686 | LM: 0.3615 | LB: 1.0744 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:21] Epoch 1 | Step 5830 | Loss: 0.3685 | LM: 0.3614 | LB: 1.0744 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:28] Epoch 1 | Step 5840 | Loss: 0.3685 | LM: 0.3614 | LB: 1.0743 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:35] Epoch 1 | Step 5850 | Loss: 0.3685 | LM: 0.3616 | LB: 1.0743 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:42] Epoch 1 | Step 5860 | Loss: 0.3684 | LM: 0.3614 | LB: 1.0743 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:50] Epoch 1 | Step 5870 | Loss: 0.3684 | LM: 0.3614 | LB: 1.0743 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:30:57] Epoch 1 | Step 5880 | Loss: 0.3684 | LM: 0.3615 | LB: 1.0743 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.375 | LR: 5.00e-05 +[2026-04-26 01:31:04] Epoch 1 | Step 5890 | Loss: 0.3684 | LM: 0.3614 | LB: 1.0742 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:11] Epoch 1 | Step 5900 | Loss: 0.3683 | LM: 0.3614 | LB: 1.0742 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:18] Epoch 1 | Step 5910 | Loss: 0.3683 | LM: 0.3614 | LB: 1.0742 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:25] Epoch 1 | Step 5920 | Loss: 0.3682 | LM: 0.3614 | LB: 1.0742 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:33] Epoch 1 | Step 5930 | Loss: 0.3682 | LM: 0.3614 | LB: 1.0741 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:40] Epoch 1 | Step 5940 | Loss: 0.3682 | LM: 0.3614 | LB: 1.0741 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:47] Epoch 1 | Step 5950 | Loss: 0.3682 | LM: 0.3614 | LB: 1.0741 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:31:54] Epoch 1 | Step 5960 | Loss: 0.3682 | LM: 0.3614 | LB: 1.0741 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:01] Epoch 1 | Step 5970 | Loss: 0.3682 | LM: 0.3613 | LB: 1.0740 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:08] Epoch 1 | Step 5980 | Loss: 0.3682 | LM: 0.3613 | LB: 1.0740 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:15] Epoch 1 | Step 5990 | Loss: 0.3682 | LM: 0.3614 | LB: 1.0740 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:23] Epoch 1 | Step 6000 | Loss: 0.3681 | LM: 0.3613 | LB: 1.0740 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:23] Validation | Batch 10/42 | Loss: 0.3375 | LM_LOSS: 0.3270 | LB_LOSS: 1.0515 +[2026-04-26 01:32:24] Validation | Batch 20/42 | Loss: 0.3568 | LM_LOSS: 0.3462 | LB_LOSS: 1.0552 +[2026-04-26 01:32:26] Validation | Batch 30/42 | Loss: 0.3470 | LM_LOSS: 0.3365 | LB_LOSS: 1.0541 +[2026-04-26 01:32:27] Validation | Batch 40/42 | Loss: 0.3531 | LM_LOSS: 0.3426 | LB_LOSS: 1.0537 +[2026-04-26 01:32:27] Validation | Batch 42/42 | Loss: 0.3528 | LM_LOSS: 0.3423 | LB_LOSS: 1.0540 +[2026-04-26 01:32:27] Validation | Loss: 0.3528 | LM_LOSS: 0.3423 | LB_LOSS: 1.0540 | PPL: 1.40 | Time: 4.47s +[2026-04-26 01:32:30] New best model saved! Val loss: 0.3528 +[2026-04-26 01:32:37] Epoch 1 | Step 6010 | Loss: 0.3681 | LM: 0.3612 | LB: 1.0739 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:44] Epoch 1 | Step 6020 | Loss: 0.3681 | LM: 0.3611 | LB: 1.0739 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.383/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:51] Epoch 1 | Step 6030 | Loss: 0.3681 | LM: 0.3609 | LB: 1.0738 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:32:59] Epoch 1 | Step 6040 | Loss: 0.3681 | LM: 0.3609 | LB: 1.0738 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:06] Epoch 1 | Step 6050 | Loss: 0.3681 | LM: 0.3610 | LB: 1.0738 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:13] Epoch 1 | Step 6060 | Loss: 0.3681 | LM: 0.3610 | LB: 1.0738 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:20] Epoch 1 | Step 6070 | Loss: 0.3680 | LM: 0.3609 | LB: 1.0737 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:27] Epoch 1 | Step 6080 | Loss: 0.3680 | LM: 0.3608 | LB: 1.0737 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:34] Epoch 1 | Step 6090 | Loss: 0.3680 | LM: 0.3608 | LB: 1.0737 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:41] Epoch 1 | Step 6100 | Loss: 0.3680 | LM: 0.3608 | LB: 1.0736 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:48] Epoch 1 | Step 6110 | Loss: 0.3680 | LM: 0.3608 | LB: 1.0736 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:33:56] Epoch 1 | Step 6120 | Loss: 0.3679 | LM: 0.3608 | LB: 1.0736 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:03] Epoch 1 | Step 6130 | Loss: 0.3678 | LM: 0.3607 | LB: 1.0735 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:10] Epoch 1 | Step 6140 | Loss: 0.3677 | LM: 0.3605 | LB: 1.0735 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:17] Epoch 1 | Step 6150 | Loss: 0.3676 | LM: 0.3604 | LB: 1.0735 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:24] Epoch 1 | Step 6160 | Loss: 0.3676 | LM: 0.3604 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:31] Epoch 1 | Step 6170 | Loss: 0.3676 | LM: 0.3602 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:38] Epoch 1 | Step 6180 | Loss: 0.3675 | LM: 0.3603 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:45] Epoch 1 | Step 6190 | Loss: 0.3674 | LM: 0.3603 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:34:53] Epoch 1 | Step 6200 | Loss: 0.3674 | LM: 0.3602 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:35:00] Epoch 1 | Step 6210 | Loss: 0.3674 | LM: 0.3600 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:35:07] Epoch 1 | Step 6220 | Loss: 0.3674 | LM: 0.3601 | LB: 1.0734 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:35:14] Epoch 1 | Step 6230 | Loss: 0.3673 | LM: 0.3602 | LB: 1.0733 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.374 | LR: 5.00e-05 +[2026-04-26 01:35:21] Epoch 1 | Step 6240 | Loss: 0.3673 | LM: 0.3601 | LB: 1.0733 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:35:28] Epoch 1 | Step 6250 | Loss: 0.3672 | LM: 0.3601 | LB: 1.0733 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:35:35] Epoch 1 | Step 6260 | Loss: 0.3672 | LM: 0.3602 | LB: 1.0733 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:35:43] Epoch 1 | Step 6270 | Loss: 0.3671 | LM: 0.3601 | LB: 1.0732 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:35:50] Epoch 1 | Step 6280 | Loss: 0.3670 | LM: 0.3598 | LB: 1.0732 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:35:57] Epoch 1 | Step 6290 | Loss: 0.3670 | LM: 0.3598 | LB: 1.0732 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:04] Epoch 1 | Step 6300 | Loss: 0.3669 | LM: 0.3598 | LB: 1.0732 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:11] Epoch 1 | Step 6310 | Loss: 0.3669 | LM: 0.3598 | LB: 1.0731 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.382/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:18] Epoch 1 | Step 6320 | Loss: 0.3668 | LM: 0.3598 | LB: 1.0731 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:25] Epoch 1 | Step 6330 | Loss: 0.3669 | LM: 0.3599 | LB: 1.0731 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:33] Epoch 1 | Step 6340 | Loss: 0.3669 | LM: 0.3599 | LB: 1.0730 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:40] Epoch 1 | Step 6350 | Loss: 0.3669 | LM: 0.3599 | LB: 1.0730 | CL0: 2.9 | CL1: 2.6 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:47] Epoch 1 | Step 6360 | Loss: 0.3669 | LM: 0.3600 | LB: 1.0730 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:36:54] Epoch 1 | Step 6370 | Loss: 0.3669 | LM: 0.3599 | LB: 1.0730 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:01] Epoch 1 | Step 6380 | Loss: 0.3668 | LM: 0.3598 | LB: 1.0729 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:08] Epoch 1 | Step 6390 | Loss: 0.3668 | LM: 0.3597 | LB: 1.0729 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.339 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:15] Epoch 1 | Step 6400 | Loss: 0.3668 | LM: 0.3596 | LB: 1.0729 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:22] Epoch 1 | Step 6410 | Loss: 0.3667 | LM: 0.3596 | LB: 1.0729 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:29] Epoch 1 | Step 6420 | Loss: 0.3667 | LM: 0.3595 | LB: 1.0729 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:37] Epoch 1 | Step 6430 | Loss: 0.3666 | LM: 0.3595 | LB: 1.0728 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:44] Epoch 1 | Step 6440 | Loss: 0.3666 | LM: 0.3594 | LB: 1.0728 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:51] Epoch 1 | Step 6450 | Loss: 0.3666 | LM: 0.3594 | LB: 1.0728 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:37:58] Epoch 1 | Step 6460 | Loss: 0.3665 | LM: 0.3592 | LB: 1.0727 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:05] Epoch 1 | Step 6470 | Loss: 0.3665 | LM: 0.3591 | LB: 1.0727 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:12] Epoch 1 | Step 6480 | Loss: 0.3665 | LM: 0.3592 | LB: 1.0727 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:19] Epoch 1 | Step 6490 | Loss: 0.3666 | LM: 0.3593 | LB: 1.0727 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:26] Epoch 1 | Step 6500 | Loss: 0.3665 | LM: 0.3591 | LB: 1.0726 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:34] Epoch 1 | Step 6510 | Loss: 0.3664 | LM: 0.3591 | LB: 1.0726 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:41] Epoch 1 | Step 6520 | Loss: 0.3664 | LM: 0.3591 | LB: 1.0726 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:48] Epoch 1 | Step 6530 | Loss: 0.3662 | LM: 0.3590 | LB: 1.0726 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:38:55] Epoch 1 | Step 6540 | Loss: 0.3662 | LM: 0.3590 | LB: 1.0725 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:02] Epoch 1 | Step 6550 | Loss: 0.3662 | LM: 0.3587 | LB: 1.0725 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:09] Epoch 1 | Step 6560 | Loss: 0.3661 | LM: 0.3586 | LB: 1.0725 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:16] Epoch 1 | Step 6570 | Loss: 0.3661 | LM: 0.3587 | LB: 1.0725 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:24] Epoch 1 | Step 6580 | Loss: 0.3661 | LM: 0.3587 | LB: 1.0725 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:31] Epoch 1 | Step 6590 | Loss: 0.3660 | LM: 0.3587 | LB: 1.0724 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:38] Epoch 1 | Step 6600 | Loss: 0.3660 | LM: 0.3586 | LB: 1.0724 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:45] Epoch 1 | Step 6610 | Loss: 0.3660 | LM: 0.3586 | LB: 1.0724 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.373 | LR: 5.00e-05 +[2026-04-26 01:39:52] Epoch 1 | Step 6620 | Loss: 0.3659 | LM: 0.3584 | LB: 1.0724 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:39:59] Epoch 1 | Step 6630 | Loss: 0.3659 | LM: 0.3583 | LB: 1.0724 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:06] Epoch 1 | Step 6640 | Loss: 0.3659 | LM: 0.3584 | LB: 1.0723 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:13] Epoch 1 | Step 6650 | Loss: 0.3659 | LM: 0.3583 | LB: 1.0723 | CL0: 2.9 | CL1: 2.7 | HR0: 0.352/SR0: 0.338 | HR1: 0.381/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:20] Epoch 1 | Step 6660 | Loss: 0.3657 | LM: 0.3582 | LB: 1.0723 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:27] Epoch 1 | Step 6670 | Loss: 0.3657 | LM: 0.3581 | LB: 1.0722 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:34] Epoch 1 | Step 6680 | Loss: 0.3657 | LM: 0.3579 | LB: 1.0722 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:41] Epoch 1 | Step 6690 | Loss: 0.3657 | LM: 0.3579 | LB: 1.0722 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:48] Epoch 1 | Step 6700 | Loss: 0.3656 | LM: 0.3578 | LB: 1.0722 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:40:56] Epoch 1 | Step 6710 | Loss: 0.3656 | LM: 0.3578 | LB: 1.0721 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:03] Epoch 1 | Step 6720 | Loss: 0.3656 | LM: 0.3577 | LB: 1.0721 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:10] Epoch 1 | Step 6730 | Loss: 0.3656 | LM: 0.3577 | LB: 1.0721 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:17] Epoch 1 | Step 6740 | Loss: 0.3656 | LM: 0.3577 | LB: 1.0721 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:24] Epoch 1 | Step 6750 | Loss: 0.3655 | LM: 0.3577 | LB: 1.0721 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:31] Epoch 1 | Step 6760 | Loss: 0.3655 | LM: 0.3578 | LB: 1.0720 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:38] Epoch 1 | Step 6770 | Loss: 0.3655 | LM: 0.3577 | LB: 1.0720 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:45] Epoch 1 | Step 6780 | Loss: 0.3655 | LM: 0.3577 | LB: 1.0720 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:52] Epoch 1 | Step 6790 | Loss: 0.3655 | LM: 0.3577 | LB: 1.0720 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:41:59] Epoch 1 | Step 6800 | Loss: 0.3656 | LM: 0.3576 | LB: 1.0720 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:06] Epoch 1 | Step 6810 | Loss: 0.3656 | LM: 0.3576 | LB: 1.0719 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:13] Epoch 1 | Step 6820 | Loss: 0.3656 | LM: 0.3575 | LB: 1.0719 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:20] Epoch 1 | Step 6830 | Loss: 0.3657 | LM: 0.3576 | LB: 1.0719 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:27] Epoch 1 | Step 6840 | Loss: 0.3657 | LM: 0.3575 | LB: 1.0719 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:35] Epoch 1 | Step 6850 | Loss: 0.3657 | LM: 0.3574 | LB: 1.0719 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:42] Epoch 1 | Step 6860 | Loss: 0.3657 | LM: 0.3575 | LB: 1.0718 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:49] Epoch 1 | Step 6870 | Loss: 0.3656 | LM: 0.3574 | LB: 1.0718 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:42:56] Epoch 1 | Step 6880 | Loss: 0.3656 | LM: 0.3574 | LB: 1.0718 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:03] Epoch 1 | Step 6890 | Loss: 0.3656 | LM: 0.3573 | LB: 1.0718 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:10] Epoch 1 | Step 6900 | Loss: 0.3656 | LM: 0.3574 | LB: 1.0717 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:17] Epoch 1 | Step 6910 | Loss: 0.3655 | LM: 0.3573 | LB: 1.0717 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:24] Epoch 1 | Step 6920 | Loss: 0.3655 | LM: 0.3573 | LB: 1.0717 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:31] Epoch 1 | Step 6930 | Loss: 0.3655 | LM: 0.3573 | LB: 1.0717 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:38] Epoch 1 | Step 6940 | Loss: 0.3654 | LM: 0.3572 | LB: 1.0716 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:45] Epoch 1 | Step 6950 | Loss: 0.3654 | LM: 0.3572 | LB: 1.0716 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:52] Epoch 1 | Step 6960 | Loss: 0.3654 | LM: 0.3572 | LB: 1.0716 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:43:59] Epoch 1 | Step 6970 | Loss: 0.3653 | LM: 0.3571 | LB: 1.0716 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:44:06] Epoch 1 | Step 6980 | Loss: 0.3653 | LM: 0.3571 | LB: 1.0716 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.380/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:44:14] Epoch 1 | Step 6990 | Loss: 0.3652 | LM: 0.3571 | LB: 1.0715 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:44:21] Epoch 1 | Step 7000 | Loss: 0.3652 | LM: 0.3572 | LB: 1.0715 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:44:28] Epoch 1 | Step 7010 | Loss: 0.3651 | LM: 0.3572 | LB: 1.0715 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.372 | LR: 5.00e-05 +[2026-04-26 01:44:35] Epoch 1 | Step 7020 | Loss: 0.3651 | LM: 0.3572 | LB: 1.0715 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:44:42] Epoch 1 | Step 7030 | Loss: 0.3651 | LM: 0.3571 | LB: 1.0715 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:44:49] Epoch 1 | Step 7040 | Loss: 0.3651 | LM: 0.3571 | LB: 1.0715 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:44:56] Epoch 1 | Step 7050 | Loss: 0.3650 | LM: 0.3572 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:04] Epoch 1 | Step 7060 | Loss: 0.3650 | LM: 0.3572 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:11] Epoch 1 | Step 7070 | Loss: 0.3650 | LM: 0.3572 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:18] Epoch 1 | Step 7080 | Loss: 0.3650 | LM: 0.3571 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:25] Epoch 1 | Step 7090 | Loss: 0.3650 | LM: 0.3573 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:32] Epoch 1 | Step 7100 | Loss: 0.3649 | LM: 0.3571 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:39] Epoch 1 | Step 7110 | Loss: 0.3648 | LM: 0.3570 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:46] Epoch 1 | Step 7120 | Loss: 0.3649 | LM: 0.3569 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:45:53] Epoch 1 | Step 7130 | Loss: 0.3648 | LM: 0.3569 | LB: 1.0714 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:00] Epoch 1 | Step 7140 | Loss: 0.3648 | LM: 0.3569 | LB: 1.0713 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:07] Epoch 1 | Step 7150 | Loss: 0.3648 | LM: 0.3568 | LB: 1.0713 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:14] Epoch 1 | Step 7160 | Loss: 0.3647 | LM: 0.3566 | LB: 1.0713 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:21] Epoch 1 | Step 7170 | Loss: 0.3647 | LM: 0.3567 | LB: 1.0713 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:28] Epoch 1 | Step 7180 | Loss: 0.3647 | LM: 0.3566 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:35] Epoch 1 | Step 7190 | Loss: 0.3648 | LM: 0.3567 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:43] Epoch 1 | Step 7200 | Loss: 0.3647 | LM: 0.3566 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:50] Epoch 1 | Step 7210 | Loss: 0.3646 | LM: 0.3565 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:46:57] Epoch 1 | Step 7220 | Loss: 0.3647 | LM: 0.3566 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:04] Epoch 1 | Step 7230 | Loss: 0.3646 | LM: 0.3566 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:11] Epoch 1 | Step 7240 | Loss: 0.3646 | LM: 0.3564 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:18] Epoch 1 | Step 7250 | Loss: 0.3646 | LM: 0.3564 | LB: 1.0712 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:25] Epoch 1 | Step 7260 | Loss: 0.3645 | LM: 0.3562 | LB: 1.0711 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:32] Epoch 1 | Step 7270 | Loss: 0.3645 | LM: 0.3563 | LB: 1.0711 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:39] Epoch 1 | Step 7280 | Loss: 0.3645 | LM: 0.3563 | LB: 1.0711 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:46] Epoch 1 | Step 7290 | Loss: 0.3645 | LM: 0.3562 | LB: 1.0711 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:47:53] Epoch 1 | Step 7300 | Loss: 0.3644 | LM: 0.3562 | LB: 1.0710 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:00] Epoch 1 | Step 7310 | Loss: 0.3644 | LM: 0.3561 | LB: 1.0710 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:07] Epoch 1 | Step 7320 | Loss: 0.3643 | LM: 0.3562 | LB: 1.0710 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:14] Epoch 1 | Step 7330 | Loss: 0.3643 | LM: 0.3561 | LB: 1.0710 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:22] Epoch 1 | Step 7340 | Loss: 0.3644 | LM: 0.3562 | LB: 1.0710 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:29] Epoch 1 | Step 7350 | Loss: 0.3644 | LM: 0.3563 | LB: 1.0710 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:36] Epoch 1 | Step 7360 | Loss: 0.3643 | LM: 0.3562 | LB: 1.0709 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:43] Epoch 1 | Step 7370 | Loss: 0.3642 | LM: 0.3561 | LB: 1.0709 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:50] Epoch 1 | Step 7380 | Loss: 0.3642 | LM: 0.3560 | LB: 1.0709 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:48:57] Epoch 1 | Step 7390 | Loss: 0.3641 | LM: 0.3560 | LB: 1.0709 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:04] Epoch 1 | Step 7400 | Loss: 0.3641 | LM: 0.3559 | LB: 1.0709 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:11] Epoch 1 | Step 7410 | Loss: 0.3641 | LM: 0.3558 | LB: 1.0708 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:18] Epoch 1 | Step 7420 | Loss: 0.3641 | LM: 0.3559 | LB: 1.0708 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:25] Epoch 1 | Step 7430 | Loss: 0.3641 | LM: 0.3558 | LB: 1.0708 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:32] Epoch 1 | Step 7440 | Loss: 0.3640 | LM: 0.3558 | LB: 1.0708 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:39] Epoch 1 | Step 7450 | Loss: 0.3640 | LM: 0.3556 | LB: 1.0708 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:46] Epoch 1 | Step 7460 | Loss: 0.3639 | LM: 0.3555 | LB: 1.0708 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.379/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:49:53] Epoch 1 | Step 7470 | Loss: 0.3639 | LM: 0.3554 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:01] Epoch 1 | Step 7480 | Loss: 0.3639 | LM: 0.3553 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:08] Epoch 1 | Step 7490 | Loss: 0.3639 | LM: 0.3553 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:15] Epoch 1 | Step 7500 | Loss: 0.3639 | LM: 0.3554 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:22] Epoch 1 | Step 7510 | Loss: 0.3640 | LM: 0.3553 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:29] Epoch 1 | Step 7520 | Loss: 0.3639 | LM: 0.3553 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:36] Epoch 1 | Step 7530 | Loss: 0.3638 | LM: 0.3552 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:43] Epoch 1 | Step 7540 | Loss: 0.3638 | LM: 0.3551 | LB: 1.0707 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:50] Epoch 1 | Step 7550 | Loss: 0.3638 | LM: 0.3552 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:50:57] Epoch 1 | Step 7560 | Loss: 0.3637 | LM: 0.3550 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:51:04] Epoch 1 | Step 7570 | Loss: 0.3637 | LM: 0.3551 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:51:12] Epoch 1 | Step 7580 | Loss: 0.3637 | LM: 0.3551 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.371 | LR: 5.00e-05 +[2026-04-26 01:51:19] Epoch 1 | Step 7590 | Loss: 0.3637 | LM: 0.3550 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:51:26] Epoch 1 | Step 7600 | Loss: 0.3636 | LM: 0.3551 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:51:33] Epoch 1 | Step 7610 | Loss: 0.3636 | LM: 0.3552 | LB: 1.0706 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:51:40] Epoch 1 | Step 7620 | Loss: 0.3635 | LM: 0.3551 | LB: 1.0705 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:51:47] Epoch 1 | Step 7630 | Loss: 0.3635 | LM: 0.3550 | LB: 1.0705 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:51:54] Epoch 1 | Step 7640 | Loss: 0.3634 | LM: 0.3549 | LB: 1.0705 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:01] Epoch 1 | Step 7650 | Loss: 0.3634 | LM: 0.3548 | LB: 1.0705 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:08] Epoch 1 | Step 7660 | Loss: 0.3633 | LM: 0.3548 | LB: 1.0705 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:15] Epoch 1 | Step 7670 | Loss: 0.3633 | LM: 0.3547 | LB: 1.0705 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:22] Epoch 1 | Step 7680 | Loss: 0.3632 | LM: 0.3547 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:30] Epoch 1 | Step 7690 | Loss: 0.3632 | LM: 0.3547 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:37] Epoch 1 | Step 7700 | Loss: 0.3632 | LM: 0.3547 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:44] Epoch 1 | Step 7710 | Loss: 0.3631 | LM: 0.3545 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:51] Epoch 1 | Step 7720 | Loss: 0.3632 | LM: 0.3545 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:52:58] Epoch 1 | Step 7730 | Loss: 0.3632 | LM: 0.3545 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:05] Epoch 1 | Step 7740 | Loss: 0.3632 | LM: 0.3546 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:12] Epoch 1 | Step 7750 | Loss: 0.3632 | LM: 0.3546 | LB: 1.0704 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:19] Epoch 1 | Step 7760 | Loss: 0.3631 | LM: 0.3546 | LB: 1.0703 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:26] Epoch 1 | Step 7770 | Loss: 0.3631 | LM: 0.3544 | LB: 1.0703 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:33] Epoch 1 | Step 7780 | Loss: 0.3630 | LM: 0.3543 | LB: 1.0703 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:40] Epoch 1 | Step 7790 | Loss: 0.3630 | LM: 0.3543 | LB: 1.0703 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:47] Epoch 1 | Step 7800 | Loss: 0.3630 | LM: 0.3543 | LB: 1.0703 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:53:54] Epoch 1 | Step 7810 | Loss: 0.3630 | LM: 0.3543 | LB: 1.0703 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:02] Epoch 1 | Step 7820 | Loss: 0.3630 | LM: 0.3543 | LB: 1.0702 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:09] Epoch 1 | Step 7830 | Loss: 0.3629 | LM: 0.3543 | LB: 1.0702 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:16] Epoch 1 | Step 7840 | Loss: 0.3629 | LM: 0.3541 | LB: 1.0702 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:23] Epoch 1 | Step 7850 | Loss: 0.3628 | LM: 0.3540 | LB: 1.0702 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:30] Epoch 1 | Step 7860 | Loss: 0.3628 | LM: 0.3539 | LB: 1.0702 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:37] Epoch 1 | Step 7870 | Loss: 0.3627 | LM: 0.3539 | LB: 1.0701 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:44] Epoch 1 | Step 7880 | Loss: 0.3627 | LM: 0.3539 | LB: 1.0701 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:51] Epoch 1 | Step 7890 | Loss: 0.3627 | LM: 0.3538 | LB: 1.0701 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:54:58] Epoch 1 | Step 7900 | Loss: 0.3627 | LM: 0.3538 | LB: 1.0701 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:05] Epoch 1 | Step 7910 | Loss: 0.3627 | LM: 0.3539 | LB: 1.0701 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:12] Epoch 1 | Step 7920 | Loss: 0.3627 | LM: 0.3540 | LB: 1.0701 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:19] Epoch 1 | Step 7930 | Loss: 0.3627 | LM: 0.3540 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:26] Epoch 1 | Step 7940 | Loss: 0.3627 | LM: 0.3539 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:33] Epoch 1 | Step 7950 | Loss: 0.3627 | LM: 0.3540 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.378/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:41] Epoch 1 | Step 7960 | Loss: 0.3627 | LM: 0.3540 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:48] Epoch 1 | Step 7970 | Loss: 0.3627 | LM: 0.3540 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:55:55] Epoch 1 | Step 7980 | Loss: 0.3626 | LM: 0.3539 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:02] Epoch 1 | Step 7990 | Loss: 0.3626 | LM: 0.3539 | LB: 1.0700 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:09] Epoch 1 | Step 8000 | Loss: 0.3626 | LM: 0.3538 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:10] Validation | Batch 10/42 | Loss: 0.3336 | LM_LOSS: 0.3230 | LB_LOSS: 1.0526 +[2026-04-26 01:56:11] Validation | Batch 20/42 | Loss: 0.3531 | LM_LOSS: 0.3426 | LB_LOSS: 1.0564 +[2026-04-26 01:56:12] Validation | Batch 30/42 | Loss: 0.3430 | LM_LOSS: 0.3325 | LB_LOSS: 1.0553 +[2026-04-26 01:56:13] Validation | Batch 40/42 | Loss: 0.3490 | LM_LOSS: 0.3385 | LB_LOSS: 1.0550 +[2026-04-26 01:56:13] Validation | Batch 42/42 | Loss: 0.3487 | LM_LOSS: 0.3382 | LB_LOSS: 1.0553 +[2026-04-26 01:56:13] Validation | Loss: 0.3487 | LM_LOSS: 0.3382 | LB_LOSS: 1.0553 | PPL: 1.40 | Time: 4.52s +[2026-04-26 01:56:16] New best model saved! Val loss: 0.3487 +[2026-04-26 01:56:23] Epoch 1 | Step 8010 | Loss: 0.3625 | LM: 0.3537 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:31] Epoch 1 | Step 8020 | Loss: 0.3624 | LM: 0.3536 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:38] Epoch 1 | Step 8030 | Loss: 0.3624 | LM: 0.3534 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:45] Epoch 1 | Step 8040 | Loss: 0.3624 | LM: 0.3534 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:52] Epoch 1 | Step 8050 | Loss: 0.3623 | LM: 0.3532 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:56:59] Epoch 1 | Step 8060 | Loss: 0.3623 | LM: 0.3532 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:06] Epoch 1 | Step 8070 | Loss: 0.3622 | LM: 0.3531 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:13] Epoch 1 | Step 8080 | Loss: 0.3623 | LM: 0.3532 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:20] Epoch 1 | Step 8090 | Loss: 0.3622 | LM: 0.3531 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:27] Epoch 1 | Step 8100 | Loss: 0.3621 | LM: 0.3531 | LB: 1.0699 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:34] Epoch 1 | Step 8110 | Loss: 0.3622 | LM: 0.3531 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:41] Epoch 1 | Step 8120 | Loss: 0.3622 | LM: 0.3531 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:49] Epoch 1 | Step 8130 | Loss: 0.3621 | LM: 0.3531 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:57:56] Epoch 1 | Step 8140 | Loss: 0.3622 | LM: 0.3531 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:03] Epoch 1 | Step 8150 | Loss: 0.3622 | LM: 0.3531 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:10] Epoch 1 | Step 8160 | Loss: 0.3621 | LM: 0.3530 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.338 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:17] Epoch 1 | Step 8170 | Loss: 0.3621 | LM: 0.3529 | LB: 1.0698 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:24] Epoch 1 | Step 8180 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0697 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:31] Epoch 1 | Step 8190 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0697 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:38] Epoch 1 | Step 8200 | Loss: 0.3620 | LM: 0.3531 | LB: 1.0697 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:45] Epoch 1 | Step 8210 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0697 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:52] Epoch 1 | Step 8220 | Loss: 0.3619 | LM: 0.3530 | LB: 1.0697 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.370 | LR: 5.00e-05 +[2026-04-26 01:58:59] Epoch 1 | Step 8230 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0697 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:06] Epoch 1 | Step 8240 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:13] Epoch 1 | Step 8250 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:21] Epoch 1 | Step 8260 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:28] Epoch 1 | Step 8270 | Loss: 0.3620 | LM: 0.3530 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:35] Epoch 1 | Step 8280 | Loss: 0.3620 | LM: 0.3529 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:42] Epoch 1 | Step 8290 | Loss: 0.3620 | LM: 0.3529 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:49] Epoch 1 | Step 8300 | Loss: 0.3620 | LM: 0.3528 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 01:59:56] Epoch 1 | Step 8310 | Loss: 0.3620 | LM: 0.3528 | LB: 1.0696 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:03] Epoch 1 | Step 8320 | Loss: 0.3620 | LM: 0.3527 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:10] Epoch 1 | Step 8330 | Loss: 0.3620 | LM: 0.3528 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:17] Epoch 1 | Step 8340 | Loss: 0.3619 | LM: 0.3529 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:25] Epoch 1 | Step 8350 | Loss: 0.3619 | LM: 0.3529 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:32] Epoch 1 | Step 8360 | Loss: 0.3618 | LM: 0.3529 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:39] Epoch 1 | Step 8370 | Loss: 0.3618 | LM: 0.3529 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:46] Epoch 1 | Step 8380 | Loss: 0.3618 | LM: 0.3528 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:00:53] Epoch 1 | Step 8390 | Loss: 0.3618 | LM: 0.3529 | LB: 1.0695 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:00] Epoch 1 | Step 8400 | Loss: 0.3618 | LM: 0.3528 | LB: 1.0694 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:07] Epoch 1 | Step 8410 | Loss: 0.3618 | LM: 0.3528 | LB: 1.0694 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:14] Epoch 1 | Step 8420 | Loss: 0.3618 | LM: 0.3528 | LB: 1.0694 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:22] Epoch 1 | Step 8430 | Loss: 0.3617 | LM: 0.3527 | LB: 1.0694 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:29] Epoch 1 | Step 8440 | Loss: 0.3617 | LM: 0.3527 | LB: 1.0694 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:36] Epoch 1 | Step 8450 | Loss: 0.3617 | LM: 0.3526 | LB: 1.0694 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:43] Epoch 1 | Step 8460 | Loss: 0.3617 | LM: 0.3527 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:50] Epoch 1 | Step 8470 | Loss: 0.3616 | LM: 0.3526 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:01:57] Epoch 1 | Step 8480 | Loss: 0.3616 | LM: 0.3526 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:04] Epoch 1 | Step 8490 | Loss: 0.3616 | LM: 0.3526 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:11] Epoch 1 | Step 8500 | Loss: 0.3616 | LM: 0.3526 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:19] Epoch 1 | Step 8510 | Loss: 0.3615 | LM: 0.3525 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:26] Epoch 1 | Step 8520 | Loss: 0.3615 | LM: 0.3524 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:33] Epoch 1 | Step 8530 | Loss: 0.3615 | LM: 0.3524 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:40] Epoch 1 | Step 8540 | Loss: 0.3615 | LM: 0.3525 | LB: 1.0693 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:47] Epoch 1 | Step 8550 | Loss: 0.3615 | LM: 0.3525 | LB: 1.0692 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:02:54] Epoch 1 | Step 8560 | Loss: 0.3615 | LM: 0.3524 | LB: 1.0692 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:01] Epoch 1 | Step 8570 | Loss: 0.3615 | LM: 0.3524 | LB: 1.0692 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.377/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:08] Epoch 1 | Step 8580 | Loss: 0.3614 | LM: 0.3523 | LB: 1.0692 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:15] Epoch 1 | Step 8590 | Loss: 0.3614 | LM: 0.3522 | LB: 1.0692 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:22] Epoch 1 | Step 8600 | Loss: 0.3613 | LM: 0.3521 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:30] Epoch 1 | Step 8610 | Loss: 0.3613 | LM: 0.3521 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:37] Epoch 1 | Step 8620 | Loss: 0.3613 | LM: 0.3520 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:44] Epoch 1 | Step 8630 | Loss: 0.3612 | LM: 0.3519 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:51] Epoch 1 | Step 8640 | Loss: 0.3612 | LM: 0.3520 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:03:58] Epoch 1 | Step 8650 | Loss: 0.3612 | LM: 0.3519 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:05] Epoch 1 | Step 8660 | Loss: 0.3612 | LM: 0.3518 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:12] Epoch 1 | Step 8670 | Loss: 0.3612 | LM: 0.3518 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:19] Epoch 1 | Step 8680 | Loss: 0.3612 | LM: 0.3518 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:26] Epoch 1 | Step 8690 | Loss: 0.3611 | LM: 0.3517 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:33] Epoch 1 | Step 8700 | Loss: 0.3611 | LM: 0.3517 | LB: 1.0691 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:41] Epoch 1 | Step 8710 | Loss: 0.3610 | LM: 0.3516 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:48] Epoch 1 | Step 8720 | Loss: 0.3610 | LM: 0.3516 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:04:55] Epoch 1 | Step 8730 | Loss: 0.3610 | LM: 0.3516 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:02] Epoch 1 | Step 8740 | Loss: 0.3610 | LM: 0.3516 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:09] Epoch 1 | Step 8750 | Loss: 0.3610 | LM: 0.3516 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:16] Epoch 1 | Step 8760 | Loss: 0.3609 | LM: 0.3517 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:23] Epoch 1 | Step 8770 | Loss: 0.3609 | LM: 0.3517 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:30] Epoch 1 | Step 8780 | Loss: 0.3608 | LM: 0.3517 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:37] Epoch 1 | Step 8790 | Loss: 0.3608 | LM: 0.3518 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:44] Epoch 1 | Step 8800 | Loss: 0.3608 | LM: 0.3517 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:51] Epoch 1 | Step 8810 | Loss: 0.3607 | LM: 0.3517 | LB: 1.0690 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:05:58] Epoch 1 | Step 8820 | Loss: 0.3607 | LM: 0.3517 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:05] Epoch 1 | Step 8830 | Loss: 0.3607 | LM: 0.3518 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:12] Epoch 1 | Step 8840 | Loss: 0.3606 | LM: 0.3518 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:20] Epoch 1 | Step 8850 | Loss: 0.3606 | LM: 0.3517 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:27] Epoch 1 | Step 8860 | Loss: 0.3606 | LM: 0.3517 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:34] Epoch 1 | Step 8870 | Loss: 0.3606 | LM: 0.3518 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:41] Epoch 1 | Step 8880 | Loss: 0.3606 | LM: 0.3517 | LB: 1.0689 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:48] Epoch 1 | Step 8890 | Loss: 0.3605 | LM: 0.3517 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:06:55] Epoch 1 | Step 8900 | Loss: 0.3604 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:07:02] Epoch 1 | Step 8910 | Loss: 0.3605 | LM: 0.3518 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:07:09] Epoch 1 | Step 8920 | Loss: 0.3604 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:07:17] Epoch 1 | Step 8930 | Loss: 0.3603 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:07:24] Epoch 1 | Step 8940 | Loss: 0.3603 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:07:31] Epoch 1 | Step 8950 | Loss: 0.3603 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.369 | LR: 5.00e-05 +[2026-04-26 02:07:38] Epoch 1 | Step 8960 | Loss: 0.3603 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:07:45] Epoch 1 | Step 8970 | Loss: 0.3603 | LM: 0.3516 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:07:52] Epoch 1 | Step 8980 | Loss: 0.3603 | LM: 0.3515 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:07:59] Epoch 1 | Step 8990 | Loss: 0.3602 | LM: 0.3514 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:06] Epoch 1 | Step 9000 | Loss: 0.3602 | LM: 0.3515 | LB: 1.0688 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:13] Epoch 1 | Step 9010 | Loss: 0.3602 | LM: 0.3514 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:20] Epoch 1 | Step 9020 | Loss: 0.3602 | LM: 0.3514 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:27] Epoch 1 | Step 9030 | Loss: 0.3602 | LM: 0.3514 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:34] Epoch 1 | Step 9040 | Loss: 0.3602 | LM: 0.3513 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:42] Epoch 1 | Step 9050 | Loss: 0.3601 | LM: 0.3513 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:49] Epoch 1 | Step 9060 | Loss: 0.3601 | LM: 0.3512 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:08:56] Epoch 1 | Step 9070 | Loss: 0.3601 | LM: 0.3512 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:03] Epoch 1 | Step 9080 | Loss: 0.3601 | LM: 0.3512 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:10] Epoch 1 | Step 9090 | Loss: 0.3601 | LM: 0.3511 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:17] Epoch 1 | Step 9100 | Loss: 0.3601 | LM: 0.3511 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:24] Epoch 1 | Step 9110 | Loss: 0.3600 | LM: 0.3511 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:31] Epoch 1 | Step 9120 | Loss: 0.3600 | LM: 0.3511 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:38] Epoch 1 | Step 9130 | Loss: 0.3600 | LM: 0.3511 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:45] Epoch 1 | Step 9140 | Loss: 0.3600 | LM: 0.3511 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:09:52] Epoch 1 | Step 9150 | Loss: 0.3600 | LM: 0.3510 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:00] Epoch 1 | Step 9160 | Loss: 0.3599 | LM: 0.3510 | LB: 1.0687 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:07] Epoch 1 | Step 9170 | Loss: 0.3598 | LM: 0.3509 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:14] Epoch 1 | Step 9180 | Loss: 0.3598 | LM: 0.3509 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:21] Epoch 1 | Step 9190 | Loss: 0.3597 | LM: 0.3508 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:28] Epoch 1 | Step 9200 | Loss: 0.3597 | LM: 0.3507 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:35] Epoch 1 | Step 9210 | Loss: 0.3597 | LM: 0.3507 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:42] Epoch 1 | Step 9220 | Loss: 0.3597 | LM: 0.3506 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:49] Epoch 1 | Step 9230 | Loss: 0.3597 | LM: 0.3506 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:10:56] Epoch 1 | Step 9240 | Loss: 0.3596 | LM: 0.3505 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:03] Epoch 1 | Step 9250 | Loss: 0.3596 | LM: 0.3504 | LB: 1.0686 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:10] Epoch 1 | Step 9260 | Loss: 0.3595 | LM: 0.3505 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:18] Epoch 1 | Step 9270 | Loss: 0.3595 | LM: 0.3504 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:25] Epoch 1 | Step 9280 | Loss: 0.3595 | LM: 0.3504 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:32] Epoch 1 | Step 9290 | Loss: 0.3595 | LM: 0.3504 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:39] Epoch 1 | Step 9300 | Loss: 0.3594 | LM: 0.3503 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:46] Epoch 1 | Step 9310 | Loss: 0.3594 | LM: 0.3503 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:11:53] Epoch 1 | Step 9320 | Loss: 0.3594 | LM: 0.3503 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:00] Epoch 1 | Step 9330 | Loss: 0.3594 | LM: 0.3503 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:07] Epoch 1 | Step 9340 | Loss: 0.3593 | LM: 0.3502 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:14] Epoch 1 | Step 9350 | Loss: 0.3593 | LM: 0.3502 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.376/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:21] Epoch 1 | Step 9360 | Loss: 0.3592 | LM: 0.3501 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:29] Epoch 1 | Step 9370 | Loss: 0.3592 | LM: 0.3500 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:36] Epoch 1 | Step 9380 | Loss: 0.3592 | LM: 0.3500 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:43] Epoch 1 | Step 9390 | Loss: 0.3591 | LM: 0.3499 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:50] Epoch 1 | Step 9400 | Loss: 0.3591 | LM: 0.3500 | LB: 1.0685 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:12:57] Epoch 1 | Step 9410 | Loss: 0.3591 | LM: 0.3500 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:04] Epoch 1 | Step 9420 | Loss: 0.3592 | LM: 0.3500 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:11] Epoch 1 | Step 9430 | Loss: 0.3592 | LM: 0.3500 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:18] Epoch 1 | Step 9440 | Loss: 0.3591 | LM: 0.3499 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:25] Epoch 1 | Step 9450 | Loss: 0.3591 | LM: 0.3499 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:32] Epoch 1 | Step 9460 | Loss: 0.3591 | LM: 0.3499 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:39] Epoch 1 | Step 9470 | Loss: 0.3590 | LM: 0.3498 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:46] Epoch 1 | Step 9480 | Loss: 0.3589 | LM: 0.3497 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:13:53] Epoch 1 | Step 9490 | Loss: 0.3589 | LM: 0.3497 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:00] Epoch 1 | Step 9500 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:07] Epoch 1 | Step 9510 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:14] Epoch 1 | Step 9520 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:22] Epoch 1 | Step 9530 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0684 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:29] Epoch 1 | Step 9540 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:36] Epoch 1 | Step 9550 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:43] Epoch 1 | Step 9560 | Loss: 0.3589 | LM: 0.3498 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:50] Epoch 1 | Step 9570 | Loss: 0.3589 | LM: 0.3497 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:14:57] Epoch 1 | Step 9580 | Loss: 0.3588 | LM: 0.3497 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:04] Epoch 1 | Step 9590 | Loss: 0.3588 | LM: 0.3497 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:11] Epoch 1 | Step 9600 | Loss: 0.3588 | LM: 0.3497 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:18] Epoch 1 | Step 9610 | Loss: 0.3587 | LM: 0.3496 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:25] Epoch 1 | Step 9620 | Loss: 0.3587 | LM: 0.3495 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:32] Epoch 1 | Step 9630 | Loss: 0.3588 | LM: 0.3495 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:39] Epoch 1 | Step 9640 | Loss: 0.3587 | LM: 0.3494 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:46] Epoch 1 | Step 9650 | Loss: 0.3587 | LM: 0.3494 | LB: 1.0683 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:15:53] Epoch 1 | Step 9660 | Loss: 0.3587 | LM: 0.3493 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:01] Epoch 1 | Step 9670 | Loss: 0.3587 | LM: 0.3493 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:08] Epoch 1 | Step 9680 | Loss: 0.3586 | LM: 0.3493 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:15] Epoch 1 | Step 9690 | Loss: 0.3586 | LM: 0.3492 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:22] Epoch 1 | Step 9700 | Loss: 0.3586 | LM: 0.3492 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:29] Epoch 1 | Step 9710 | Loss: 0.3586 | LM: 0.3491 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:36] Epoch 1 | Step 9720 | Loss: 0.3585 | LM: 0.3491 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:43] Epoch 1 | Step 9730 | Loss: 0.3585 | LM: 0.3491 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:50] Epoch 1 | Step 9740 | Loss: 0.3585 | LM: 0.3491 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:16:57] Epoch 1 | Step 9750 | Loss: 0.3585 | LM: 0.3490 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:04] Epoch 1 | Step 9760 | Loss: 0.3584 | LM: 0.3490 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:11] Epoch 1 | Step 9770 | Loss: 0.3584 | LM: 0.3489 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:18] Epoch 1 | Step 9780 | Loss: 0.3584 | LM: 0.3489 | LB: 1.0682 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:25] Epoch 1 | Step 9790 | Loss: 0.3584 | LM: 0.3489 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:32] Epoch 1 | Step 9800 | Loss: 0.3583 | LM: 0.3489 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:39] Epoch 1 | Step 9810 | Loss: 0.3583 | LM: 0.3489 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:46] Epoch 1 | Step 9820 | Loss: 0.3583 | LM: 0.3488 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:17:54] Epoch 1 | Step 9830 | Loss: 0.3582 | LM: 0.3488 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:18:01] Epoch 1 | Step 9840 | Loss: 0.3583 | LM: 0.3487 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:18:08] Epoch 1 | Step 9850 | Loss: 0.3582 | LM: 0.3487 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:18:15] Epoch 1 | Step 9860 | Loss: 0.3582 | LM: 0.3488 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:18:22] Epoch 1 | Step 9870 | Loss: 0.3582 | LM: 0.3488 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:18:29] Epoch 1 | Step 9880 | Loss: 0.3582 | LM: 0.3488 | LB: 1.0681 | CL0: 2.9 | CL1: 2.7 | HR0: 0.351/SR0: 0.337 | HR1: 0.375/SR1: 0.368 | LR: 5.00e-05 +[2026-04-26 02:18:35] Epoch 1 completed in 7083.67s | Loss: 0.3582 | CL0: 2.9 | CL1: 2.7 +[2026-04-26 02:18:35] +Training completed! +[2026-04-26 02:18:37] Final model: /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4/model_final.pt diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/requirements.txt b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f040f697230340f8a88a6e7387f7e8983d11b547 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/requirements.txt @@ -0,0 +1,245 @@ +setuptools==78.1.1 +wheel==0.45.1 +pip==25.2 +webencodings==0.5.1 +triton==3.2.0 +pytz==2025.2 +pydub==0.25.1 +pure_eval==0.2.3 +ptyprocess==0.7.0 +nvidia-ml-py==13.590.48 +nvidia-cusparselt-cu12==0.6.2 +mpmath==1.3.0 +ipython-genutils==0.2.0 +fastjsonschema==2.21.2 +brotli==1.2.0 +antlr4-python3-runtime==4.9.3 +xxhash==3.6.0 +widgetsnbextension==4.0.14 +websocket-client==1.9.0 +webcolors==24.11.1 +wcwidth==0.2.14 +urllib3==2.5.0 +uri-template==1.3.0 +tzdata==2025.2 +typing_extensions==4.15.0 +types-python-dateutil==2.9.0.20251008 +traitlets==5.14.3 +tqdm==4.67.1 +tornado==6.5.2 +tomlkit==0.13.3 +tinycss2==1.4.0 +tabulate==0.9.0 +sympy==1.13.1 +soupsieve==2.8 +sniffio==1.3.1 +smmap==5.0.2 +six==1.17.0 +shellingham==1.5.4 +Send2Trash==1.8.3 +semantic-version==2.10.0 +safetensors==0.6.2 +rpds-py==0.27.1 +rfc3986-validator==0.1.1 +regex==2025.9.18 +pyzmq==27.1.0 +PyYAML==6.0.3 +python-multipart==0.0.22 +python-json-logger==4.0.0 +python-dotenv==1.2.1 +pyparsing==3.2.5 +PyJWT==2.8.0 +Pygments==2.19.2 +pycparser==2.23 +pyarrow==22.0.0 +psutil==7.1.0 +protobuf==6.33.4 +propcache==0.4.1 +prometheus_client==0.23.1 +portalocker==3.2.0 +platformdirs==4.5.0 +pillow==11.3.0 +pexpect==4.9.0 +pathspec==1.0.4 +parso==0.8.5 +pandocfilters==1.5.1 +packaging==25.0 +orjson==3.11.6 +opt_einsum==3.4.0 +nvidia-nvtx-cu12==12.4.127 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nccl-cu12==2.21.5 +nvidia-curand-cu12==10.3.5.147 +nvidia-cufile-cu12==1.13.1.3 +nvidia-cufft-cu12==11.2.1.3 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cublas-cu12==12.4.5.8 +numpy==2.3.3 +ninja==1.13.0 +networkx==3.5 +nest-asyncio==1.6.0 +narwhals==2.15.0 +mypy_extensions==1.1.0 +multidict==6.7.0 +mistune==3.1.4 +mdurl==0.1.2 +MarkupSafe==3.0.3 +lxml==6.0.2 +librt==0.8.0 +lark==1.3.0 +kiwisolver==1.4.9 +jupyterlab_widgets==3.0.15 +jupyterlab_pygments==0.3.0 +jsonpointer==3.0.0 +json5==0.12.1 +itsdangerous==2.2.0 +idna==3.10 +hf-xet==1.1.10 +h11==0.16.0 +groovy==0.1.2 +fsspec==2025.9.0 +frozenlist==1.8.0 +fqdn==1.5.1 +fonttools==4.60.1 +filelock==3.19.1 +ffmpy==1.0.0 +executing==2.2.1 +einops==0.8.1 +dill==0.4.0 +defusedxml==0.7.1 +decorator==5.2.1 +debugpy==1.8.17 +dacite==1.9.2 +cycler==0.12.1 +comm==0.2.3 +colorama==0.4.6 +click==8.3.1 +charset-normalizer==3.4.3 +certifi==2025.10.5 +bleach==6.2.0 +babel==2.17.0 +attrs==25.4.0 +async-lru==2.0.5 +asttokens==3.0.0 +annotated-types==0.7.0 +annotated-doc==0.0.4 +aiohappyeyeballs==2.6.1 +aiofiles==24.1.0 +yarl==1.22.0 +uvicorn==0.40.0 +typing-inspection==0.4.2 +terminado==0.18.1 +stack-data==0.6.3 +sentry-sdk==2.50.0 +scipy==1.17.0 +sacrebleu==2.6.0 +rfc3987-syntax==1.1.0 +rfc3339-validator==0.1.4 +requests==2.32.5 +reportlab==4.4.9 +referencing==0.36.2 +python-dateutil==2.9.0.post0 +pydantic_core==2.41.5 +prompt_toolkit==3.0.52 +plotly==6.5.2 +pathlib2==2.3.7.post1 +orderedmultidict==1.0.2 +optree==0.17.0 +omegaconf==2.3.0 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +mypy==1.19.1 +multiprocess==0.70.16 +matplotlib-inline==0.1.7 +markdown-it-py==4.0.0 +jupyter_core==5.8.1 +Jinja2==3.1.6 +jedi==0.19.2 +ipython_pygments_lexers==1.1.1 +httpcore==1.0.9 +gitdb==4.0.12 +ftfy==6.3.1 +contourpy==1.3.3 +cffi==2.0.0 +beautifulsoup4==4.14.2 +anyio==4.11.0 +aiosignal==1.4.0 +starlette==0.50.0 +rich==14.2.0 +pydantic==2.12.5 +pandas==2.3.3 +nvidia-cusolver-cu12==11.6.1.9 +matplotlib==3.10.7 +jupyter_server_terminals==0.5.3 +jupyter_client==8.6.3 +jsonschema-specifications==2025.9.1 +ipython==9.6.0 +hydra-core==1.3.2 +huggingface-hub==0.35.3 +httpx==0.28.1 +GitPython==3.1.46 +furl==2.1.4 +cryptography==46.0.4 +arrow==1.3.0 +argon2-cffi-bindings==25.1.0 +aiohttp==3.13.1 +wandb==0.24.0 +typer==0.21.1 +torch==2.6.0 +tokenizers==0.22.1 +seaborn==0.13.2 +safehttpx==0.1.7 +jsonschema==4.25.1 +joypy==0.2.6 +isoduration==20.11.0 +ipywidgets==8.1.7 +ipykernel==6.30.1 +gradio_client==2.0.3 +fastapi==0.128.0 +Authlib==1.6.6 +argon2-cffi==25.1.0 +transformers==4.57.6 +nbformat==5.10.4 +mlstm_kernels==2.0.2 +jupyter-console==6.6.3 +gradio==6.5.1 +datasets==4.3.0 +clearml==1.16.4 +accelerate==1.10.1 +xlstm==2.0.4 +nbclient==0.10.2 +jupyter-events==0.12.0 +trackio==0.15.0 +nbconvert==7.16.6 +jupyter_server==2.17.0 +notebook_shim==0.2.4 +jupyterlab_server==2.27.3 +jupyter-lsp==2.3.0 +nbclassic==1.3.3 +jupyterlab==4.4.9 +notebook==7.4.7 +jupyter_contrib_core==0.4.2 +jupyter==1.1.1 +jupyter_nbextensions_configurator==0.6.4 +causal-conv1d==1.5.0.post8 +flash_attn==2.7.4.post1 +mamba-ssm==2.2.4 +hnet==0.0.1 +autocommand==2.2.2 +backports.tarfile==1.2.0 +importlib_metadata==8.0.0 +inflect==7.3.1 +jaraco.collections==5.1.0 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +more-itertools==10.3.0 +packaging==24.2 +platformdirs==4.2.2 +tomli==2.0.1 +typeguard==4.3.0 +typing_extensions==4.12.2 +wheel==0.45.1 +zipp==3.19.2 diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/wandb-metadata.json b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..18956954518ae52fea15524eee1ab012d9f60322 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/wandb-metadata.json @@ -0,0 +1,69 @@ +{ + "os": "Linux-5.4.0-176-generic-x86_64-with-glibc2.35", + "python": "CPython 3.12.0", + "startedAt": "2026-04-26T00:19:41.197293Z", + "args": [ + "tracking=wandb", + "tracking.project=code-completion_lr-sweep", + "tracking.run_name=hnet_xl_code_lr_5e-4", + "training.lr=5e-4", + "paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4", + "data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full" + ], + "program": "/workspace/byte-llms-code/code_completion_exp/train_hnet/train.py", + "codePath": "code_completion_exp/train_hnet/train.py", + "codePathLocal": "train.py", + "git": { + "remote": "https://github.com/naryst/byte-llms-code.git", + "commit": "0a7180b6ab9f63d2794494f09ec4918576d10fa2" + }, + "email": "nikita@local.ru", + "root": "/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4", + "host": "7504e518d24a", + "executable": "/venv/bytellm/bin/python", + "cpu_count": 64, + "cpu_count_logical": 128, + "gpu": "NVIDIA H100 80GB HBM3", + "gpu_count": 4, + "disk": { + "/": { + "total": "265214230528", + "used": "170465628160" + } + }, + "memory": { + "total": "1081679683584" + }, + "gpu_nvidia": [ + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-b60cdcab-2033-2009-41de-be646c953a20" + }, + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-9982b420-4520-4238-c378-ec5a46015474" + }, + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f" + }, + { + "name": "NVIDIA H100 80GB HBM3", + "memoryTotal": "85520809984", + "cudaCores": 16896, + "architecture": "Hopper", + "uuid": "GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134" + } + ], + "cudaVersion": "12.2", + "writerId": "dyau2sb185lrum72o3jj1efzj126ey29" +} \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/wandb-summary.json b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..845447a493dec18fe19ecc6f5a0562918c002c6e --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/files/wandb-summary.json @@ -0,0 +1 @@ +{"train/step_time":0.7018253087997437,"best/val_perplexity":1.3968818580448776,"_runtime":7135,"train/soft_boundary_ratio_stage0":0.3369161636344325,"epoch/hard_boundary_ratio_stage0":0.35071097942532414,"_timestamp":1.7771699152059996e+09,"train/loss_avg":0.35818642927241806,"epoch/chunk_len_stage1":2.6923843128064373,"train/lb_loss":1.0790050029754639,"best/val_loss":0.34870333082619165,"train/loss":0.24225624278187752,"epoch/lm_loss":0.3488054759618164,"val/lm_loss":0.33815049202669234,"epoch/soft_boundary_ratio_stage0":0.3369219771080528,"epoch/loss":0.3581828390180251,"train/hard_boundary_ratio_stage0":0.3507020525501328,"val/time":4.524768352508545,"train/lm_loss":0.2673550844192505,"best/step":8000,"train/chunk_len_stage1":2.692232057831329,"val/loss":0.34870333082619165,"epoch/soft_boundary_ratio_stage1":0.36771885260966747,"train/hard_boundary_ratio_stage1":0.3750683158667268,"epoch/lb_loss":1.0680555066867534,"epoch/chunk_len_stage0":2.873913584180546,"train/lr":5e-05,"train/epoch":1,"val/perplexity":1.3968818580448776,"val/lb_loss":1.0552839296204704,"_wandb":{"runtime":7135},"_step":9880,"train/chunk_len_stage0":2.8739849724727216,"epoch/time":7083.666541099548,"train/soft_boundary_ratio_stage1":0.3677366381526144,"epoch/hard_boundary_ratio_stage1":0.3750465785576219} \ No newline at end of file diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-core.log b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..c3cd339db34a15c0d7bca1ec509593acf7b7a206 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-core.log @@ -0,0 +1,16 @@ +{"time":"2026-04-26T00:19:41.280606372Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpj_w4xo0a/port-257611.txt","pid":257611,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-04-26T00:19:41.281310681Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":257611} +{"time":"2026-04-26T00:19:41.281295344Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-257611-257664-4135181244/socket","Net":"unix"}} +{"time":"2026-04-26T00:19:41.468587343Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-04-26T00:19:41.489622086Z","level":"INFO","msg":"handleInformInit: received","streamId":"i6lt8av0","id":"1(@)"} +{"time":"2026-04-26T00:19:41.862752556Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"i6lt8av0","id":"1(@)"} +{"time":"2026-04-26T02:18:39.038091992Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"i6lt8av0","id":"1(@)"} +{"time":"2026-04-26T02:18:39.041417993Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"i6lt8av0","id":"1(@)"} +{"time":"2026-04-26T02:18:39.05126405Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-04-26T02:18:39.051282767Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-04-26T02:18:39.051287843Z","level":"INFO","msg":"server is shutting down"} +{"time":"2026-04-26T02:18:39.051291659Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-04-26T02:18:39.051337316Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-04-26T02:18:39.051357455Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-04-26T02:18:39.051349453Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-257611-257664-4135181244/socket","Net":"unix"}} +{"time":"2026-04-26T02:18:39.051367174Z","level":"INFO","msg":"server is closed"} diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-internal.log b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..01df25355a4e1432719caaedc9d4e8f68da663d8 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2026-04-26T00:19:41.489729754Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-26T00:19:41.862602753Z","level":"INFO","msg":"stream: created new stream","id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862649982Z","level":"INFO","msg":"handler: started","stream_id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862747075Z","level":"INFO","msg":"stream: started","id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862757167Z","level":"INFO","msg":"writer: started","stream_id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.862759475Z","level":"INFO","msg":"sender: started","stream_id":"i6lt8av0"} +{"time":"2026-04-26T00:19:41.985346468Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-26T02:18:38.932345179Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-26T02:18:39.035750391Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-26T02:18:39.038119733Z","level":"INFO","msg":"stream: closing","id":"i6lt8av0"} +{"time":"2026-04-26T02:18:39.038132189Z","level":"INFO","msg":"handler: closed","stream_id":"i6lt8av0"} +{"time":"2026-04-26T02:18:39.038193257Z","level":"INFO","msg":"sender: closed","stream_id":"i6lt8av0"} +{"time":"2026-04-26T02:18:39.038198898Z","level":"INFO","msg":"stream: closed","id":"i6lt8av0"} diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug.log b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..66c2ad2a57475850ee0c0c3117eb7cd7cd4b26e7 --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug.log @@ -0,0 +1,24 @@ +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_setup.py:_flush():81] Configure stats pid to 257611 +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug.log +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/logs/debug-internal.log +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:init():844] calling init triggers +2026-04-26 00:19:41,198 INFO MainThread:257611 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'config_path': '/workspace/byte-llms-code/hnet_project/configs/hnet_2stage_XL_code.json', 'checkpoint_path': '/workspace/byte-llms-code/hnet_project/checkpoints/hnet_2stage_XL_code.pt'}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 24, 'gradient_accumulation_steps': 4, 'lr': 0.0005, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'lr_multiplier': [2.0, 1.5, 1.0], 'load_balancing_weight': 0.01, 'load_balancing_N': 4.0, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None, 'warmup_model': True}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 0, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'hnet_xl_code_lr_5e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/hnet_xl_code_lr_5e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_hnet/train.py'}} +2026-04-26 00:19:41,199 INFO MainThread:257611 [wandb_init.py:init():892] starting backend +2026-04-26 00:19:41,468 INFO MainThread:257611 [wandb_init.py:init():895] sending inform_init request +2026-04-26 00:19:41,488 INFO MainThread:257611 [wandb_init.py:init():903] backend started and connected +2026-04-26 00:19:41,491 INFO MainThread:257611 [wandb_init.py:init():973] updated telemetry +2026-04-26 00:19:41,508 INFO MainThread:257611 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-26 00:19:41,984 INFO MainThread:257611 [wandb_init.py:init():1044] starting run threads in backend +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_console_start():2529] atexit reg +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-26 00:19:42,142 INFO MainThread:257611 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-26 00:19:42,145 INFO MainThread:257611 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-26 02:18:37,753 INFO MainThread:257611 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/i6lt8av0 +2026-04-26 02:18:37,754 INFO MainThread:257611 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-26 02:18:37,754 INFO MainThread:257611 [wandb_run.py:_restore():2476] restore +2026-04-26 02:18:37,754 INFO MainThread:257611 [wandb_run.py:_restore():2482] restore done +2026-04-26 02:18:39,037 INFO MainThread:257611 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/run-i6lt8av0.wandb b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/run-i6lt8av0.wandb new file mode 100644 index 0000000000000000000000000000000000000000..5b3be3f0baa6ad0c15a5697d719d5ee8a9711d3f --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-4/wandb/run-20260426_001941-i6lt8av0/run-i6lt8av0.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4904ef01e885208188a41b5d7f41a4a1759d924be32288783eb848b49f89430b +size 3065241 diff --git a/lr_sweep/hnet_xl_code_lr_5e-5/model_best.pt b/lr_sweep/hnet_xl_code_lr_5e-5/model_best.pt new file mode 100644 index 0000000000000000000000000000000000000000..28c417e4c6b80c147e354fd532bafe51209cb5cb --- /dev/null +++ b/lr_sweep/hnet_xl_code_lr_5e-5/model_best.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:310965a6147f6c3900fdda4b4054876d929624a57099569b4c715c336af405ec +size 3315165139 diff --git a/lr_sweep/pythia_1b_lr_1e-4/.hydra/config.yaml b/lr_sweep/pythia_1b_lr_1e-4/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6b69c50542bfb5ef5852c9b45b748740d6f95d59 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/.hydra/config.yaml @@ -0,0 +1,49 @@ +model: + name: EleutherAI/pythia-1b + checkpoint_path: null + from_scratch: false +training: + epochs: 1 + batch_size: 4 + eval_batch_size: 12 + gradient_accumulation_steps: 4 + lr: 0.0001 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null +data: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 4 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: pythia_1b_lr_1e-4 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: ${paths.output_dir} +paths: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 +seed: 42 +device: cuda diff --git a/lr_sweep/pythia_1b_lr_1e-4/.hydra/hydra.yaml b/lr_sweep/pythia_1b_lr_1e-4/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..75e84f65f07bff4ab60a5449ac36203d8f6b0b26 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/.hydra/hydra.yaml @@ -0,0 +1,167 @@ +hydra: + run: + dir: ${paths.output_dir} + sweep: + dir: outputs/multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=pythia_1b_lr_1e-4 + - training.lr=1e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 + - model=pythia_1b + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + job: + name: train + chdir: false + override_dirname: data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full,model=pythia_1b,paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4,tracking.project=code-completion_lr-sweep,tracking.run_name=pythia_1b_lr_1e-4,tracking=wandb,training.lr=1e-4 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /workspace/byte-llms-code/code_completion_exp/train_pythia + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /workspace/byte-llms-code/code_completion_exp/train_pythia/configs + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 + choices: + paths: default + tracking: wandb + logging: default + data: default + training: default + model: pythia_1b + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/lr_sweep/pythia_1b_lr_1e-4/.hydra/overrides.yaml b/lr_sweep/pythia_1b_lr_1e-4/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b4cebf4738251eda1a4dd0bfb7842fc5d41a5b35 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/.hydra/overrides.yaml @@ -0,0 +1,7 @@ +- tracking=wandb +- tracking.project=code-completion_lr-sweep +- tracking.run_name=pythia_1b_lr_1e-4 +- training.lr=1e-4 +- paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 +- model=pythia_1b +- data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/debug-cli.root.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/debug-cli.root.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/debug-internal.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..cfcffa249b5b679b1a2090f4f5cb51ce0da411e6 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2026-04-25T20:13:33.512352077Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-25T20:13:34.125183027Z","level":"INFO","msg":"stream: created new stream","id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125255934Z","level":"INFO","msg":"handler: started","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125364107Z","level":"INFO","msg":"stream: started","id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125374614Z","level":"INFO","msg":"writer: started","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125380451Z","level":"INFO","msg":"sender: started","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.291689491Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-25T20:21:29.531100906Z","level":"ERROR","msg":"api: HTTP error","status":403,"method":"POST","url":"https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/p8ozhgpm/file_stream"} +{"time":"2026-04-25T20:21:29.531179064Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 403 Forbidden url=https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/p8ozhgpm/file_stream: "} +{"time":"2026-04-25T20:56:12.137741653Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-25T20:56:12.139056718Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-25T20:56:12.142590311Z","level":"INFO","msg":"stream: closing","id":"p8ozhgpm"} +{"time":"2026-04-25T20:56:12.142600734Z","level":"INFO","msg":"handler: closed","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:56:12.142693536Z","level":"INFO","msg":"sender: closed","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:56:12.142707407Z","level":"INFO","msg":"stream: closed","id":"p8ozhgpm"} diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/debug.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..64e5cdb474041b621347916c4adba93911b07bd6 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/debug.log @@ -0,0 +1,24 @@ +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_setup.py:_flush():81] Configure stats pid to 129801 +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug.log +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-internal.log +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:init():844] calling init triggers +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'name': 'EleutherAI/pythia-1b', 'checkpoint_path': None, 'from_scratch': False}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 12, 'gradient_accumulation_steps': 4, 'lr': 0.0001, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 4, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'pythia_1b_lr_1e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_pythia/train.py'}} +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:init():892] starting backend +2026-04-25 20:13:33,489 INFO MainThread:129801 [wandb_init.py:init():895] sending inform_init request +2026-04-25 20:13:33,511 INFO MainThread:129801 [wandb_init.py:init():903] backend started and connected +2026-04-25 20:13:33,514 INFO MainThread:129801 [wandb_init.py:init():973] updated telemetry +2026-04-25 20:13:33,531 INFO MainThread:129801 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 20:13:34,288 INFO MainThread:129801 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 20:13:34,453 INFO MainThread:129801 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-25 20:56:10,222 INFO MainThread:129801 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/p8ozhgpm +2026-04-25 20:56:10,223 INFO MainThread:129801 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-25 20:56:10,223 INFO MainThread:129801 [wandb_run.py:_restore():2476] restore +2026-04-25 20:56:10,223 INFO MainThread:129801 [wandb_run.py:_restore():2482] restore done +2026-04-25 20:56:12,142 INFO MainThread:129801 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/code/code_completion_exp/train_pythia/train.py b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/code/code_completion_exp/train_pythia/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a4739962b19b1d61085c8b55220470866db8aea1 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/code/code_completion_exp/train_pythia/train.py @@ -0,0 +1,606 @@ +""" +Training Pipeline для Pythia (decoder-only transformer) на задаче Code Completion. + +Конфигурация через Hydra + OmegaConf, логирование в Trackio. +Поддержка DDP через Accelerate для multi-GPU тренировки. + +Использование: + # Базовый запуск (single GPU) + python train.py + + # Multi-GPU с Accelerate + accelerate launch train.py + + # Multi-GPU с указанием количества GPU + accelerate launch --num_processes=4 train.py + + # Переопределение параметров через CLI + python train.py training.lr=1e-4 training.epochs=5 + + # Выбор другого конфига модели + python train.py model=pythia_160m + + # Multirun (sweep) + python train.py --multirun training.lr=1e-4,3e-4,1e-3 + + # Без логирования + python train.py tracking.enabled=false +""" + +import os +import math +import time +from pathlib import Path + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader +from datasets import load_from_disk + +import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + AutoConfig, + PreTrainedTokenizerBase, +) +from accelerate import Accelerator +from accelerate.utils import set_seed as accelerate_set_seed + +# Ensure repo root is on sys.path (needed when running from subdirectory) +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# Shared training library +from training_lib.utils import AverageMeter, log_message +from training_lib.checkpointing import save_checkpoint, load_checkpoint +from training_lib.schedulers import get_lr_scheduler +from training_lib.tracking import init_tracking, log_metrics, finish_tracking +from training_lib.validation import run_validation + + +# ============================================================================ +# ДАННЫЕ +# ============================================================================ + + +class CodeCompletionCollator: + """Collate function для батчирования примеров code completion.""" + + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + max_context_len: int = 1024, + max_target_len: int = 256, + ): + self.tokenizer = tokenizer + self.max_context_len = max_context_len + self.max_target_len = max_target_len + self.pad_token_id = tokenizer.pad_token_id + + def __call__(self, batch: list[dict]) -> dict: + contexts = [item["context"] for item in batch] + targets = [item["target"] for item in batch] + + encoded_contexts = self.tokenizer( + contexts, + add_special_tokens=True, + truncation=True, + max_length=self.max_context_len, + return_tensors=None, + ) + encoded_targets = self.tokenizer( + targets, + add_special_tokens=False, + truncation=True, + max_length=self.max_target_len, + return_tensors=None, + ) + + input_ids_list = [] + context_lengths = [] + + for ctx_ids, tgt_ids in zip( + encoded_contexts["input_ids"], encoded_targets["input_ids"] + ): + tgt_ids = tgt_ids + [self.tokenizer.eos_token_id] + context_lengths.append(len(ctx_ids)) + input_ids_list.append(ctx_ids + tgt_ids) + + max_len = max(len(ids) for ids in input_ids_list) + + padded_input_ids = [] + attention_mask = [] + + for ids in input_ids_list: + padding_len = max_len - len(ids) + padded_input_ids.append(ids + [self.pad_token_id] * padding_len) + attention_mask.append([1] * len(ids) + [0] * padding_len) + + return { + "input_ids": torch.tensor(padded_input_ids, dtype=torch.long), + "attention_mask": torch.tensor(attention_mask, dtype=torch.long), + "context_lengths": torch.tensor(context_lengths, dtype=torch.long), + } + + +def create_dataloaders( + cfg: DictConfig, tokenizer: PreTrainedTokenizerBase +) -> dict[str, DataLoader]: + """Создание DataLoader'ов для train и validation.""" + dataset_dict = load_from_disk(cfg.data.path) + + collator = CodeCompletionCollator( + tokenizer=tokenizer, + max_context_len=cfg.data.max_context_len, + max_target_len=cfg.data.max_target_len, + ) + + dataloaders = {} + + if "train" in dataset_dict: + train_dataset = dataset_dict["train"] + max_train = cfg.data.get("max_train_samples", None) + if max_train is not None: + train_dataset = train_dataset.select(range(min(max_train, len(train_dataset)))) + dataloaders["train"] = DataLoader( + train_dataset, + batch_size=cfg.training.batch_size, + shuffle=True, + collate_fn=collator, + num_workers=cfg.data.num_workers, + pin_memory=cfg.data.pin_memory, + ) + + if "validation" in dataset_dict: + val_dataset = dataset_dict["validation"] + max_val = cfg.data.get("max_val_samples", None) + if max_val is not None: + val_dataset = val_dataset.select(range(min(max_val, len(val_dataset)))) + eval_batch_size = cfg.training.get("eval_batch_size", cfg.training.batch_size) + dataloaders["validation"] = DataLoader( + val_dataset, + batch_size=eval_batch_size, + shuffle=False, + collate_fn=collator, + num_workers=cfg.data.num_workers, + pin_memory=cfg.data.pin_memory, + ) + + return dataloaders + + + + +# ============================================================================ +# LOSS ФУНКЦИИ +# ============================================================================ + + +def compute_loss( + logits: torch.Tensor, + input_ids: torch.Tensor, + context_lengths: torch.Tensor, + attention_mask: torch.Tensor, +) -> dict: + """Вычисление loss для авторегрессионной модели.""" + batch_size, seq_len, vocab_size = logits.shape + + shift_logits = logits[:, :-1, :].contiguous() + shift_labels = input_ids[:, 1:].contiguous() + shift_mask = attention_mask[:, 1:].contiguous() + + target_mask = torch.zeros_like(shift_labels, dtype=torch.bool) + for i in range(batch_size): + ctx_len = context_lengths[i].item() + target_mask[i, ctx_len - 1 :] = True + + final_mask = target_mask & shift_mask.bool() + + if final_mask.sum() > 0: + loss = F.cross_entropy( + shift_logits[final_mask], shift_labels[final_mask], reduction="mean" + ) + else: + loss = torch.tensor(0.0, device=logits.device) + + return {"loss": loss} + + +def _pythia_forward_loss( + model: nn.Module, + batch: dict, + cfg: DictConfig, + accelerator: Accelerator, +) -> dict: + """Forward + loss for a plain HF causal LM (attention_mask= kwarg, .logits).""" + input_ids = batch["input_ids"] + attention_mask = batch["attention_mask"] + context_lengths = batch["context_lengths"] + output = model(input_ids, attention_mask=attention_mask) + return compute_loss(output.logits, input_ids, context_lengths, attention_mask) + + +# ============================================================================ +# PARAMETER GROUPING +# ============================================================================ + + +def group_params(model: nn.Module, weight_decay: float) -> list[dict]: + """Группировка параметров для optimizer.""" + decay_params = [] + no_decay_params = [] + + for name, param in model.named_parameters(): + if not param.requires_grad: + continue + + if "bias" in name or "LayerNorm" in name or "layernorm" in name: + no_decay_params.append(param) + else: + decay_params.append(param) + + return [ + {"params": decay_params, "weight_decay": weight_decay}, + {"params": no_decay_params, "weight_decay": 0.0}, + ] + + + + +# ============================================================================ +# TRAINING LOOP +# ============================================================================ + + +def train_epoch( + model: nn.Module, + dataloader: DataLoader, + optimizer: torch.optim.Optimizer, + scheduler, + cfg: DictConfig, + epoch: int, + global_step: int, + accelerator: Accelerator, + val_dataloader: DataLoader | None = None, + best_val_loss: float = float("inf"), +) -> tuple[int, float]: + """Один epoch тренировки. Возвращает (global_step, best_val_loss).""" + model.train() + + loss_meter = AverageMeter() + + optimizer.zero_grad() + accumulated_loss = 0.0 + accumulated_steps = 0 + + epoch_start_time = time.time() + step_start_time = time.time() + + for batch_idx, batch in enumerate(dataloader): + input_ids = batch["input_ids"] + attention_mask = batch["attention_mask"] + context_lengths = batch["context_lengths"] + + with accelerator.autocast(): + output = model(input_ids, attention_mask=attention_mask) + logits = output.logits + loss_dict = compute_loss( + logits, input_ids, context_lengths, attention_mask + ) + + loss = loss_dict["loss"] / cfg.training.gradient_accumulation_steps + accelerator.backward(loss) + + accumulated_loss += loss_dict["loss"].item() + accumulated_steps += 1 + + if accumulated_steps == cfg.training.gradient_accumulation_steps: + if cfg.training.max_grad_norm > 0: + accelerator.clip_grad_norm_( + model.parameters(), cfg.training.max_grad_norm + ) + + optimizer.step() + scheduler.step() + optimizer.zero_grad() + + avg_loss = accumulated_loss / cfg.training.gradient_accumulation_steps + loss_meter.update(avg_loss) + + global_step += 1 + + if global_step % cfg.logging.log_interval == 0: + step_time = time.time() - step_start_time + current_lr = scheduler.get_last_lr()[0] + + metrics = { + "train/loss": loss_meter.val, + "train/loss_avg": loss_meter.avg, + "train/lr": current_lr, + "train/epoch": epoch, + "train/step_time": step_time / cfg.logging.log_interval, + } + + log_metrics(metrics, step=global_step) + + log_message( + f"Epoch {epoch} | Step {global_step} | " + f"Loss: {loss_meter.avg:.4f} | " + f"LR: {current_lr:.2e}", + cfg, + accelerator, + ) + + step_start_time = time.time() + + if ( + cfg.logging.save_interval > 0 + and global_step % cfg.logging.save_interval == 0 + ): + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + eval_interval = cfg.logging.get("eval_interval", 0) + if ( + eval_interval > 0 + and val_dataloader is not None + and global_step % eval_interval == 0 + ): + val_metrics = run_validation( + model=model, + dataloader=val_dataloader, + cfg=cfg, + global_step=global_step, + accelerator=accelerator, + forward_loss_fn=_pythia_forward_loss, + ) + + if val_metrics["val/loss"] < best_val_loss: + best_val_loss = val_metrics["val/loss"] + if accelerator.is_main_process: + best_model_path = Path(cfg.paths.output_dir) / "model_best.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), best_model_path) + log_message( + f"New best model saved! Val loss: {best_val_loss:.4f}", + cfg, + accelerator + ) + + log_metrics( + { + "best/val_loss": best_val_loss, + "best/val_perplexity": val_metrics["val/perplexity"], + "best/step": global_step, + }, + step=global_step, + ) + + model.train() + + accumulated_loss = 0.0 + accumulated_steps = 0 + + epoch_time = time.time() - epoch_start_time + + log_message( + f"Epoch {epoch} completed in {epoch_time:.2f}s | " + f"Loss: {loss_meter.avg:.4f}", + cfg, + accelerator, + ) + + log_metrics({ + "epoch/loss": loss_meter.avg, + "epoch/time": epoch_time, + }) + + return global_step, best_val_loss + + +# ============================================================================ +# MAIN +# ============================================================================ + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def main(cfg: DictConfig): + """Главная функция тренировки с поддержкой DDP через Accelerate.""" + + # === Performance: Enable TF32 for faster matmuls on Ampere+ GPUs === + torch.set_float32_matmul_precision('high') + + # === Accelerator Setup === + mixed_precision = "bf16" if cfg.training.use_amp else "no" + + accelerator = Accelerator( + mixed_precision=mixed_precision, + gradient_accumulation_steps=cfg.training.gradient_accumulation_steps, + ) + + # === Setup === + accelerate_set_seed(cfg.seed) + + if cfg.paths.output_dir is None: + cfg.paths.output_dir = HydraConfig.get().runtime.output_dir + + OmegaConf.resolve(cfg) + + log_message(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'not set')}", cfg, accelerator) + log_message(f"Number of processes: {accelerator.num_processes}", cfg, accelerator) + log_message(f"Process index: {accelerator.process_index}", cfg, accelerator) + log_message(f"Mixed precision: {mixed_precision}", cfg, accelerator) + + log_message("=" * 60, cfg, accelerator) + log_message("Pythia Training Pipeline (Hydra + Trackio + Accelerate)", cfg, accelerator) + log_message("=" * 60, cfg, accelerator) + log_message(f"Config:\n{OmegaConf.to_yaml(cfg)}", cfg, accelerator) + + # === Trackio Init === + init_tracking(cfg, accelerator) + + # === Tokenizer === + log_message("Initializing tokenizer...", cfg, accelerator) + tokenizer = AutoTokenizer.from_pretrained(cfg.model.name) + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + tokenizer.pad_token_id = tokenizer.eos_token_id + + # === Model === + log_message("Loading model...", cfg, accelerator) + + # Flash Attention 2 + torch_dtype = torch.bfloat16 if cfg.training.use_amp else torch.float32 + + if cfg.model.checkpoint_path: + model = AutoModelForCausalLM.from_pretrained( + cfg.model.name, + attn_implementation="flash_attention_2", + torch_dtype=torch_dtype, + ) + checkpoint = torch.load(cfg.model.checkpoint_path, map_location="cpu") + model.load_state_dict(checkpoint["model_state_dict"] if "model_state_dict" in checkpoint else checkpoint) + log_message(f"Loaded checkpoint: {cfg.model.checkpoint_path}", cfg, accelerator) + elif cfg.model.from_scratch: + config = AutoConfig.from_pretrained(cfg.model.name) + config._attn_implementation = "flash_attention_2" + model = AutoModelForCausalLM.from_config(config, torch_dtype=torch_dtype) + log_message(f"Initialized from scratch: {cfg.model.name}", cfg, accelerator) + else: + model = AutoModelForCausalLM.from_pretrained( + cfg.model.name, + attn_implementation="flash_attention_2", + torch_dtype=torch_dtype, + ) + log_message(f"Loaded pretrained: {cfg.model.name}", cfg, accelerator) + + model.train() + + # Log model info + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + log_message(f"Total params: {total_params:,}", cfg, accelerator) + log_message(f"Trainable params: {trainable_params:,}", cfg, accelerator) + + # === Data === + log_message("Creating dataloaders...", cfg, accelerator) + dataloaders = create_dataloaders(cfg, tokenizer) + + train_dataloader = dataloaders["train"] + val_dataloader = dataloaders.get("validation", None) + + log_message(f"Train dataset size: {len(train_dataloader.dataset)}", cfg, accelerator) + log_message(f"Train batches per epoch (before DDP split): {len(train_dataloader)}", cfg, accelerator) + + if val_dataloader: + log_message(f"Validation dataset size: {len(val_dataloader.dataset)}", cfg, accelerator) + log_message(f"Validation batches: {len(val_dataloader)}", cfg, accelerator) + else: + log_message("No validation dataset found", cfg, accelerator) + + # === Optimizer === + log_message("Creating optimizer...", cfg, accelerator) + param_groups = group_params(model, cfg.training.weight_decay) + + optimizer = torch.optim.AdamW( + param_groups, + lr=cfg.training.lr, + betas=tuple(cfg.training.betas), + eps=cfg.training.eps, + ) + + # === Scheduler === + steps_per_epoch = math.ceil( + len(train_dataloader) / accelerator.num_processes + ) + total_steps = ( + cfg.training.epochs + * steps_per_epoch + // cfg.training.gradient_accumulation_steps + ) + scheduler = get_lr_scheduler(optimizer, cfg, total_steps) + + log_message( + f"Total steps: {total_steps}, Steps per epoch: {steps_per_epoch}", + cfg, + accelerator + ) + + # === Accelerate Prepare === + log_message("Preparing model, optimizer, and dataloaders with Accelerate...", cfg, accelerator) + + if val_dataloader is not None: + model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, val_dataloader, scheduler + ) + else: + model, optimizer, train_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, scheduler + ) + + log_message(f"Train batches per epoch (after DDP split): {len(train_dataloader)}", cfg, accelerator) + + # === Resume === + global_step = 0 + start_epoch = 1 + + if cfg.training.resume and cfg.training.resume_checkpoint: + global_step, start_epoch = load_checkpoint( + model, optimizer, scheduler, cfg.training.resume_checkpoint, cfg, accelerator + ) + start_epoch += 1 + + # === Training Loop === + log_message("Starting training...", cfg, accelerator) + + best_val_loss = float("inf") + + try: + for epoch in range(start_epoch, cfg.training.epochs + 1): + log_message(f"\n{'=' * 60}", cfg, accelerator) + log_message(f"EPOCH {epoch}/{cfg.training.epochs}", cfg, accelerator) + log_message(f"{'=' * 60}", cfg, accelerator) + + global_step, best_val_loss = train_epoch( + model=model, + dataloader=train_dataloader, + optimizer=optimizer, + scheduler=scheduler, + cfg=cfg, + epoch=epoch, + global_step=global_step, + accelerator=accelerator, + val_dataloader=val_dataloader, + best_val_loss=best_val_loss, + ) + + if cfg.logging.save_every_epoch: + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + except KeyboardInterrupt: + log_message("Training interrupted by user", cfg, accelerator) + save_checkpoint(model, optimizer, scheduler, global_step, epoch, cfg, accelerator) + + # === Final Save === + log_message("\nTraining completed!", cfg, accelerator) + + if accelerator.is_main_process: + final_model_path = Path(cfg.paths.output_dir) / "model_final.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), final_model_path) + log_message(f"Final model: {final_model_path}", cfg, accelerator) + + accelerator.wait_for_everyone() + finish_tracking() + + +if __name__ == "__main__": + main() diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/config.yaml b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55ec153262784719ef1f9dc3ca926b73ec0b584a --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/config.yaml @@ -0,0 +1,162 @@ +wandb_version: 1 + +_wandb: + desc: null + value: + code_path: code/code_completion_exp/train_pythia/train.py + python_version: 3.12.0 + cli_version: 0.24.0 + framework: huggingface + huggingface_version: 4.57.6 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1777148013 + t: + 1: + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + 2: + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + 3: + - 2 + - 13 + - 16 + - 37 + - 42 + - 61 + 4: 3.12.0 + 5: 0.24.0 + 6: 4.57.6 + 13: linux-x86_64 + e: + 4o5msocvznwsbtdeapabjlonbytfxwvk: + os: Linux-5.4.0-176-generic-x86_64-with-glibc2.35 + python: CPython 3.12.0 + started_at: '2026-04-25T20:13:33.215817Z' + args: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=pythia_1b_lr_1e-4 + - training.lr=1e-4 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 + - model=pythia_1b + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + program: /workspace/byte-llms-code/code_completion_exp/train_pythia/train.py + code_path: code_completion_exp/train_pythia/train.py + code_path_local: train.py + git: + remote_url: https://github.com/naryst/byte-llms-code.git + commit: f111e13281aa0dc58e24302edab5b0d5c2024586 + email: nikita@local.ru + root: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 + host: 7504e518d24a + executable: /venv/bytellm/bin/python + cpu_count: 64 + cpu_count_logical: 128 + gpu_type: NVIDIA H100 80GB HBM3 + gpu_count: 4 + disk: + /: + total: '265214230528' + used: '104089939968' + memory: + total: '1081679683584' + gpu_nvidia: + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-b60cdcab-2033-2009-41de-be646c953a20 + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-9982b420-4520-4238-c378-ec5a46015474 + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f + - name: NVIDIA H100 80GB HBM3 + memory_total: '85520809984' + cuda_cores: 16896 + architecture: Hopper + uuid: GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134 + cuda_version: '12.2' + writer_id: 4o5msocvznwsbtdeapabjlonbytfxwvk +model: + desc: null + value: + name: EleutherAI/pythia-1b + checkpoint_path: null + from_scratch: false +training: + desc: null + value: + epochs: 1 + batch_size: 4 + eval_batch_size: 12 + gradient_accumulation_steps: 4 + lr: 0.0001 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null +data: + desc: null + value: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 4 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + desc: null + value: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + desc: null + value: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: pythia_1b_lr_1e-4 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 +paths: + desc: null + value: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4 +seed: + desc: null + value: 42 +device: + desc: null + value: cuda diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/output.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..283ea9254bd19e3ea40102cf7cebe130bca9cbc8 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/output.log @@ -0,0 +1,1057 @@ +`torch_dtype` is deprecated! Use `dtype` instead! +wandb: WARNING Fatal error while uploading data. Some run data will not be synced, but it will still be written to disk. Use `wandb sync` at the end of the run to try uploading. +[2026-04-25 20:13:34] Initializing tokenizer... +[2026-04-25 20:13:34] Loading model... +[2026-04-25 20:13:37] Loaded pretrained: EleutherAI/pythia-1b +[2026-04-25 20:13:37] Total params: 1,011,781,632 +[2026-04-25 20:13:37] Trainable params: 1,011,781,632 +[2026-04-25 20:13:37] Creating dataloaders... +[2026-04-25 20:13:37] Train dataset size: 316397 +[2026-04-25 20:13:37] Train batches per epoch (before DDP split): 79100 +[2026-04-25 20:13:37] Validation dataset size: 2000 +[2026-04-25 20:13:37] Validation batches: 167 +[2026-04-25 20:13:37] Creating optimizer... +[2026-04-25 20:13:37] Total steps: 9887, Steps per epoch: 39550 +[2026-04-25 20:13:37] Preparing model, optimizer, and dataloaders with Accelerate... +[2026-04-25 20:13:39] Train batches per epoch (after DDP split): 39550 +[2026-04-25 20:13:39] Starting training... +[2026-04-25 20:13:39] +============================================================ +[2026-04-25 20:13:39] EPOCH 1/1 +[2026-04-25 20:13:39] ============================================================ +[2026-04-25 20:13:42] Epoch 1 | Step 10 | Loss: 1.5821 | LR: 1.18e-05 +[2026-04-25 20:13:45] Epoch 1 | Step 20 | Loss: 1.4030 | LR: 1.36e-05 +[2026-04-25 20:13:47] Epoch 1 | Step 30 | Loss: 1.2973 | LR: 1.55e-05 +[2026-04-25 20:13:50] Epoch 1 | Step 40 | Loss: 1.2578 | LR: 1.73e-05 +[2026-04-25 20:13:53] Epoch 1 | Step 50 | Loss: 1.2119 | LR: 1.91e-05 +[2026-04-25 20:13:56] Epoch 1 | Step 60 | Loss: 1.1840 | LR: 2.09e-05 +[2026-04-25 20:13:58] Epoch 1 | Step 70 | Loss: 1.1538 | LR: 2.28e-05 +[2026-04-25 20:14:01] Epoch 1 | Step 80 | Loss: 1.1532 | LR: 2.46e-05 +[2026-04-25 20:14:03] Epoch 1 | Step 90 | Loss: 1.1438 | LR: 2.64e-05 +[2026-04-25 20:14:06] Epoch 1 | Step 100 | Loss: 1.1344 | LR: 2.82e-05 +[2026-04-25 20:14:09] Epoch 1 | Step 110 | Loss: 1.1396 | LR: 3.00e-05 +[2026-04-25 20:14:11] Epoch 1 | Step 120 | Loss: 1.1391 | LR: 3.19e-05 +[2026-04-25 20:14:14] Epoch 1 | Step 130 | Loss: 1.1465 | LR: 3.37e-05 +[2026-04-25 20:14:16] Epoch 1 | Step 140 | Loss: 1.1553 | LR: 3.55e-05 +[2026-04-25 20:14:19] Epoch 1 | Step 150 | Loss: 1.1471 | LR: 3.73e-05 +[2026-04-25 20:14:21] Epoch 1 | Step 160 | Loss: 1.1421 | LR: 3.91e-05 +[2026-04-25 20:14:24] Epoch 1 | Step 170 | Loss: 1.1439 | LR: 4.10e-05 +[2026-04-25 20:14:26] Epoch 1 | Step 180 | Loss: 1.1374 | LR: 4.28e-05 +[2026-04-25 20:14:29] Epoch 1 | Step 190 | Loss: 1.1415 | LR: 4.46e-05 +[2026-04-25 20:14:32] Epoch 1 | Step 200 | Loss: 1.1444 | LR: 4.64e-05 +[2026-04-25 20:14:34] Epoch 1 | Step 210 | Loss: 1.1538 | LR: 4.83e-05 +[2026-04-25 20:14:37] Epoch 1 | Step 220 | Loss: 1.1587 | LR: 5.01e-05 +[2026-04-25 20:14:39] Epoch 1 | Step 230 | Loss: 1.1555 | LR: 5.19e-05 +[2026-04-25 20:14:42] Epoch 1 | Step 240 | Loss: 1.1568 | LR: 5.37e-05 +[2026-04-25 20:14:44] Epoch 1 | Step 250 | Loss: 1.1584 | LR: 5.55e-05 +[2026-04-25 20:14:47] Epoch 1 | Step 260 | Loss: 1.1665 | LR: 5.74e-05 +[2026-04-25 20:14:49] Epoch 1 | Step 270 | Loss: 1.1680 | LR: 5.92e-05 +[2026-04-25 20:14:52] Epoch 1 | Step 280 | Loss: 1.1668 | LR: 6.10e-05 +[2026-04-25 20:14:55] Epoch 1 | Step 290 | Loss: 1.1693 | LR: 6.28e-05 +[2026-04-25 20:14:57] Epoch 1 | Step 300 | Loss: 1.1729 | LR: 6.47e-05 +[2026-04-25 20:15:00] Epoch 1 | Step 310 | Loss: 1.1764 | LR: 6.65e-05 +[2026-04-25 20:15:02] Epoch 1 | Step 320 | Loss: 1.1767 | LR: 6.83e-05 +[2026-04-25 20:15:05] Epoch 1 | Step 330 | Loss: 1.1793 | LR: 7.01e-05 +[2026-04-25 20:15:08] Epoch 1 | Step 340 | Loss: 1.1832 | LR: 7.19e-05 +[2026-04-25 20:15:10] Epoch 1 | Step 350 | Loss: 1.1884 | LR: 7.38e-05 +[2026-04-25 20:15:13] Epoch 1 | Step 360 | Loss: 1.1906 | LR: 7.56e-05 +[2026-04-25 20:15:16] Epoch 1 | Step 370 | Loss: 1.1922 | LR: 7.74e-05 +[2026-04-25 20:15:18] Epoch 1 | Step 380 | Loss: 1.1955 | LR: 7.92e-05 +[2026-04-25 20:15:21] Epoch 1 | Step 390 | Loss: 1.1994 | LR: 8.11e-05 +[2026-04-25 20:15:23] Epoch 1 | Step 400 | Loss: 1.2043 | LR: 8.29e-05 +[2026-04-25 20:15:26] Epoch 1 | Step 410 | Loss: 1.2095 | LR: 8.47e-05 +[2026-04-25 20:15:29] Epoch 1 | Step 420 | Loss: 1.2152 | LR: 8.65e-05 +[2026-04-25 20:15:31] Epoch 1 | Step 430 | Loss: 1.2239 | LR: 8.83e-05 +[2026-04-25 20:15:34] Epoch 1 | Step 440 | Loss: 1.2254 | LR: 9.02e-05 +[2026-04-25 20:15:37] Epoch 1 | Step 450 | Loss: 1.2297 | LR: 9.20e-05 +[2026-04-25 20:15:39] Epoch 1 | Step 460 | Loss: 1.2354 | LR: 9.38e-05 +[2026-04-25 20:15:42] Epoch 1 | Step 470 | Loss: 1.2394 | LR: 9.56e-05 +[2026-04-25 20:15:44] Epoch 1 | Step 480 | Loss: 1.2462 | LR: 9.74e-05 +[2026-04-25 20:15:47] Epoch 1 | Step 490 | Loss: 1.2508 | LR: 9.93e-05 +[2026-04-25 20:15:49] Epoch 1 | Step 500 | Loss: 1.2557 | LR: 1.00e-04 +[2026-04-25 20:15:52] Epoch 1 | Step 510 | Loss: 1.2617 | LR: 1.00e-04 +[2026-04-25 20:15:54] Epoch 1 | Step 520 | Loss: 1.2677 | LR: 1.00e-04 +[2026-04-25 20:15:57] Epoch 1 | Step 530 | Loss: 1.2712 | LR: 1.00e-04 +[2026-04-25 20:16:00] Epoch 1 | Step 540 | Loss: 1.2743 | LR: 1.00e-04 +[2026-04-25 20:16:02] Epoch 1 | Step 550 | Loss: 1.2779 | LR: 1.00e-04 +[2026-04-25 20:16:05] Epoch 1 | Step 560 | Loss: 1.2813 | LR: 1.00e-04 +[2026-04-25 20:16:07] Epoch 1 | Step 570 | Loss: 1.2866 | LR: 1.00e-04 +[2026-04-25 20:16:10] Epoch 1 | Step 580 | Loss: 1.2923 | LR: 1.00e-04 +[2026-04-25 20:16:12] Epoch 1 | Step 590 | Loss: 1.2968 | LR: 1.00e-04 +[2026-04-25 20:16:15] Epoch 1 | Step 600 | Loss: 1.3014 | LR: 1.00e-04 +[2026-04-25 20:16:17] Epoch 1 | Step 610 | Loss: 1.3082 | LR: 1.00e-04 +[2026-04-25 20:16:20] Epoch 1 | Step 620 | Loss: 1.3145 | LR: 1.00e-04 +[2026-04-25 20:16:22] Epoch 1 | Step 630 | Loss: 1.3201 | LR: 1.00e-04 +[2026-04-25 20:16:25] Epoch 1 | Step 640 | Loss: 1.3257 | LR: 1.00e-04 +[2026-04-25 20:16:27] Epoch 1 | Step 650 | Loss: 1.3311 | LR: 1.00e-04 +[2026-04-25 20:16:30] Epoch 1 | Step 660 | Loss: 1.3362 | LR: 1.00e-04 +[2026-04-25 20:16:32] Epoch 1 | Step 670 | Loss: 1.3399 | LR: 1.00e-04 +[2026-04-25 20:16:35] Epoch 1 | Step 680 | Loss: 1.3452 | LR: 1.00e-04 +[2026-04-25 20:16:38] Epoch 1 | Step 690 | Loss: 1.3482 | LR: 1.00e-04 +[2026-04-25 20:16:40] Epoch 1 | Step 700 | Loss: 1.3537 | LR: 1.00e-04 +[2026-04-25 20:16:42] Epoch 1 | Step 710 | Loss: 1.3568 | LR: 1.00e-04 +[2026-04-25 20:16:45] Epoch 1 | Step 720 | Loss: 1.3600 | LR: 1.00e-04 +[2026-04-25 20:16:48] Epoch 1 | Step 730 | Loss: 1.3633 | LR: 1.00e-04 +[2026-04-25 20:16:50] Epoch 1 | Step 740 | Loss: 1.3661 | LR: 1.00e-04 +[2026-04-25 20:16:52] Epoch 1 | Step 750 | Loss: 1.3680 | LR: 1.00e-04 +[2026-04-25 20:16:55] Epoch 1 | Step 760 | Loss: 1.3728 | LR: 1.00e-04 +[2026-04-25 20:16:58] Epoch 1 | Step 770 | Loss: 1.3775 | LR: 1.00e-04 +[2026-04-25 20:17:00] Epoch 1 | Step 780 | Loss: 1.3813 | LR: 1.00e-04 +[2026-04-25 20:17:02] Epoch 1 | Step 790 | Loss: 1.3851 | LR: 1.00e-04 +[2026-04-25 20:17:05] Epoch 1 | Step 800 | Loss: 1.3867 | LR: 1.00e-04 +[2026-04-25 20:17:08] Epoch 1 | Step 810 | Loss: 1.3902 | LR: 1.00e-04 +[2026-04-25 20:17:10] Epoch 1 | Step 820 | Loss: 1.3927 | LR: 1.00e-04 +[2026-04-25 20:17:13] Epoch 1 | Step 830 | Loss: 1.3954 | LR: 1.00e-04 +[2026-04-25 20:17:15] Epoch 1 | Step 840 | Loss: 1.3972 | LR: 1.00e-04 +[2026-04-25 20:17:18] Epoch 1 | Step 850 | Loss: 1.3981 | LR: 1.00e-04 +[2026-04-25 20:17:20] Epoch 1 | Step 860 | Loss: 1.4020 | LR: 1.00e-04 +[2026-04-25 20:17:23] Epoch 1 | Step 870 | Loss: 1.4055 | LR: 1.00e-04 +[2026-04-25 20:17:25] Epoch 1 | Step 880 | Loss: 1.4089 | LR: 1.00e-04 +[2026-04-25 20:17:28] Epoch 1 | Step 890 | Loss: 1.4109 | LR: 1.00e-04 +[2026-04-25 20:17:30] Epoch 1 | Step 900 | Loss: 1.4119 | LR: 1.00e-04 +[2026-04-25 20:17:33] Epoch 1 | Step 910 | Loss: 1.4154 | LR: 1.00e-04 +[2026-04-25 20:17:36] Epoch 1 | Step 920 | Loss: 1.4191 | LR: 1.00e-04 +[2026-04-25 20:17:38] Epoch 1 | Step 930 | Loss: 1.4208 | LR: 1.00e-04 +[2026-04-25 20:17:41] Epoch 1 | Step 940 | Loss: 1.4231 | LR: 1.00e-04 +[2026-04-25 20:17:43] Epoch 1 | Step 950 | Loss: 1.4237 | LR: 1.00e-04 +[2026-04-25 20:17:46] Epoch 1 | Step 960 | Loss: 1.4261 | LR: 1.00e-04 +[2026-04-25 20:17:49] Epoch 1 | Step 970 | Loss: 1.4283 | LR: 1.00e-04 +[2026-04-25 20:17:51] Epoch 1 | Step 980 | Loss: 1.4295 | LR: 1.00e-04 +[2026-04-25 20:17:54] Epoch 1 | Step 990 | Loss: 1.4301 | LR: 1.00e-04 +[2026-04-25 20:17:57] Epoch 1 | Step 1000 | Loss: 1.4326 | LR: 1.00e-04 +[2026-04-25 20:17:59] Epoch 1 | Step 1010 | Loss: 1.4358 | LR: 1.00e-04 +[2026-04-25 20:18:02] Epoch 1 | Step 1020 | Loss: 1.4375 | LR: 1.00e-04 +[2026-04-25 20:18:04] Epoch 1 | Step 1030 | Loss: 1.4408 | LR: 1.00e-04 +[2026-04-25 20:18:07] Epoch 1 | Step 1040 | Loss: 1.4410 | LR: 1.00e-04 +[2026-04-25 20:18:09] Epoch 1 | Step 1050 | Loss: 1.4424 | LR: 1.00e-04 +[2026-04-25 20:18:12] Epoch 1 | Step 1060 | Loss: 1.4426 | LR: 1.00e-04 +[2026-04-25 20:18:14] Epoch 1 | Step 1070 | Loss: 1.4437 | LR: 1.00e-04 +[2026-04-25 20:18:17] Epoch 1 | Step 1080 | Loss: 1.4466 | LR: 1.00e-04 +[2026-04-25 20:18:19] Epoch 1 | Step 1090 | Loss: 1.4506 | LR: 1.00e-04 +[2026-04-25 20:18:22] Epoch 1 | Step 1100 | Loss: 1.4514 | LR: 1.00e-04 +[2026-04-25 20:18:24] Epoch 1 | Step 1110 | Loss: 1.4540 | LR: 1.00e-04 +[2026-04-25 20:18:27] Epoch 1 | Step 1120 | Loss: 1.4568 | LR: 1.00e-04 +[2026-04-25 20:18:29] Epoch 1 | Step 1130 | Loss: 1.4586 | LR: 1.00e-04 +[2026-04-25 20:18:32] Epoch 1 | Step 1140 | Loss: 1.4596 | LR: 1.00e-04 +[2026-04-25 20:18:35] Epoch 1 | Step 1150 | Loss: 1.4591 | LR: 1.00e-04 +[2026-04-25 20:18:37] Epoch 1 | Step 1160 | Loss: 1.4616 | LR: 1.00e-04 +[2026-04-25 20:18:40] Epoch 1 | Step 1170 | Loss: 1.4643 | LR: 1.00e-04 +[2026-04-25 20:18:43] Epoch 1 | Step 1180 | Loss: 1.4654 | LR: 1.00e-04 +[2026-04-25 20:18:45] Epoch 1 | Step 1190 | Loss: 1.4673 | LR: 1.00e-04 +[2026-04-25 20:18:48] Epoch 1 | Step 1200 | Loss: 1.4685 | LR: 1.00e-04 +[2026-04-25 20:18:50] Epoch 1 | Step 1210 | Loss: 1.4686 | LR: 1.00e-04 +[2026-04-25 20:18:53] Epoch 1 | Step 1220 | Loss: 1.4682 | LR: 1.00e-04 +[2026-04-25 20:18:55] Epoch 1 | Step 1230 | Loss: 1.4702 | LR: 1.00e-04 +[2026-04-25 20:18:58] Epoch 1 | Step 1240 | Loss: 1.4718 | LR: 1.00e-04 +[2026-04-25 20:19:01] Epoch 1 | Step 1250 | Loss: 1.4726 | LR: 1.00e-04 +[2026-04-25 20:19:03] Epoch 1 | Step 1260 | Loss: 1.4732 | LR: 1.00e-04 +[2026-04-25 20:19:06] Epoch 1 | Step 1270 | Loss: 1.4728 | LR: 1.00e-04 +[2026-04-25 20:19:08] Epoch 1 | Step 1280 | Loss: 1.4744 | LR: 1.00e-04 +[2026-04-25 20:19:11] Epoch 1 | Step 1290 | Loss: 1.4765 | LR: 1.00e-04 +[2026-04-25 20:19:13] Epoch 1 | Step 1300 | Loss: 1.4774 | LR: 1.00e-04 +[2026-04-25 20:19:16] Epoch 1 | Step 1310 | Loss: 1.4791 | LR: 1.00e-04 +[2026-04-25 20:19:19] Epoch 1 | Step 1320 | Loss: 1.4803 | LR: 1.00e-04 +[2026-04-25 20:19:21] Epoch 1 | Step 1330 | Loss: 1.4807 | LR: 1.00e-04 +[2026-04-25 20:19:24] Epoch 1 | Step 1340 | Loss: 1.4818 | LR: 1.00e-04 +[2026-04-25 20:19:26] Epoch 1 | Step 1350 | Loss: 1.4838 | LR: 1.00e-04 +[2026-04-25 20:19:29] Epoch 1 | Step 1360 | Loss: 1.4849 | LR: 1.00e-04 +[2026-04-25 20:19:31] Epoch 1 | Step 1370 | Loss: 1.4851 | LR: 1.00e-04 +[2026-04-25 20:19:34] Epoch 1 | Step 1380 | Loss: 1.4870 | LR: 1.00e-04 +[2026-04-25 20:19:37] Epoch 1 | Step 1390 | Loss: 1.4888 | LR: 1.00e-04 +[2026-04-25 20:19:39] Epoch 1 | Step 1400 | Loss: 1.4898 | LR: 1.00e-04 +[2026-04-25 20:19:42] Epoch 1 | Step 1410 | Loss: 1.4893 | LR: 1.00e-04 +[2026-04-25 20:19:44] Epoch 1 | Step 1420 | Loss: 1.4903 | LR: 1.00e-04 +[2026-04-25 20:19:47] Epoch 1 | Step 1430 | Loss: 1.4912 | LR: 1.00e-04 +[2026-04-25 20:19:49] Epoch 1 | Step 1440 | Loss: 1.4920 | LR: 1.00e-04 +[2026-04-25 20:19:52] Epoch 1 | Step 1450 | Loss: 1.4931 | LR: 1.00e-04 +[2026-04-25 20:19:54] Epoch 1 | Step 1460 | Loss: 1.4931 | LR: 1.00e-04 +[2026-04-25 20:19:57] Epoch 1 | Step 1470 | Loss: 1.4947 | LR: 1.00e-04 +[2026-04-25 20:20:00] Epoch 1 | Step 1480 | Loss: 1.4959 | LR: 1.00e-04 +[2026-04-25 20:20:02] Epoch 1 | Step 1490 | Loss: 1.4975 | LR: 1.00e-04 +[2026-04-25 20:20:05] Epoch 1 | Step 1500 | Loss: 1.4981 | LR: 1.00e-04 +[2026-04-25 20:20:07] Epoch 1 | Step 1510 | Loss: 1.4990 | LR: 1.00e-04 +[2026-04-25 20:20:10] Epoch 1 | Step 1520 | Loss: 1.5006 | LR: 1.00e-04 +[2026-04-25 20:20:12] Epoch 1 | Step 1530 | Loss: 1.5010 | LR: 1.00e-04 +[2026-04-25 20:20:15] Epoch 1 | Step 1540 | Loss: 1.5027 | LR: 1.00e-04 +[2026-04-25 20:20:17] Epoch 1 | Step 1550 | Loss: 1.5037 | LR: 1.00e-04 +[2026-04-25 20:20:20] Epoch 1 | Step 1560 | Loss: 1.5039 | LR: 1.00e-04 +[2026-04-25 20:20:22] Epoch 1 | Step 1570 | Loss: 1.5059 | LR: 1.00e-04 +[2026-04-25 20:20:25] Epoch 1 | Step 1580 | Loss: 1.5061 | LR: 1.00e-04 +[2026-04-25 20:20:27] Epoch 1 | Step 1590 | Loss: 1.5067 | LR: 1.00e-04 +[2026-04-25 20:20:30] Epoch 1 | Step 1600 | Loss: 1.5071 | LR: 1.00e-04 +[2026-04-25 20:20:32] Epoch 1 | Step 1610 | Loss: 1.5065 | LR: 1.00e-04 +[2026-04-25 20:20:35] Epoch 1 | Step 1620 | Loss: 1.5059 | LR: 1.00e-04 +[2026-04-25 20:20:37] Epoch 1 | Step 1630 | Loss: 1.5075 | LR: 1.00e-04 +[2026-04-25 20:20:40] Epoch 1 | Step 1640 | Loss: 1.5084 | LR: 1.00e-04 +[2026-04-25 20:20:42] Epoch 1 | Step 1650 | Loss: 1.5082 | LR: 1.00e-04 +[2026-04-25 20:20:44] Epoch 1 | Step 1660 | Loss: 1.5083 | LR: 1.00e-04 +[2026-04-25 20:20:47] Epoch 1 | Step 1670 | Loss: 1.5100 | LR: 1.00e-04 +[2026-04-25 20:20:49] Epoch 1 | Step 1680 | Loss: 1.5108 | LR: 1.00e-04 +[2026-04-25 20:20:52] Epoch 1 | Step 1690 | Loss: 1.5118 | LR: 1.00e-04 +[2026-04-25 20:20:54] Epoch 1 | Step 1700 | Loss: 1.5122 | LR: 1.00e-04 +[2026-04-25 20:20:57] Epoch 1 | Step 1710 | Loss: 1.5127 | LR: 1.00e-04 +[2026-04-25 20:20:59] Epoch 1 | Step 1720 | Loss: 1.5128 | LR: 1.00e-04 +[2026-04-25 20:21:02] Epoch 1 | Step 1730 | Loss: 1.5132 | LR: 1.00e-04 +[2026-04-25 20:21:04] Epoch 1 | Step 1740 | Loss: 1.5139 | LR: 1.00e-04 +[2026-04-25 20:21:07] Epoch 1 | Step 1750 | Loss: 1.5154 | LR: 1.00e-04 +[2026-04-25 20:21:10] Epoch 1 | Step 1760 | Loss: 1.5155 | LR: 1.00e-04 +[2026-04-25 20:21:12] Epoch 1 | Step 1770 | Loss: 1.5169 | LR: 1.00e-04 +[2026-04-25 20:21:15] Epoch 1 | Step 1780 | Loss: 1.5172 | LR: 1.00e-04 +[2026-04-25 20:21:17] Epoch 1 | Step 1790 | Loss: 1.5180 | LR: 1.00e-04 +[2026-04-25 20:21:20] Epoch 1 | Step 1800 | Loss: 1.5179 | LR: 1.00e-04 +[2026-04-25 20:21:22] Epoch 1 | Step 1810 | Loss: 1.5186 | LR: 1.00e-04 +[2026-04-25 20:21:25] Epoch 1 | Step 1820 | Loss: 1.5198 | LR: 1.00e-04 +[2026-04-25 20:21:27] Epoch 1 | Step 1830 | Loss: 1.5205 | LR: 1.00e-04 +[2026-04-25 20:21:30] Epoch 1 | Step 1840 | Loss: 1.5216 | LR: 1.00e-04 +[2026-04-25 20:21:33] Epoch 1 | Step 1850 | Loss: 1.5217 | LR: 1.00e-04 +[2026-04-25 20:21:35] Epoch 1 | Step 1860 | Loss: 1.5223 | LR: 1.00e-04 +[2026-04-25 20:21:38] Epoch 1 | Step 1870 | Loss: 1.5225 | LR: 1.00e-04 +[2026-04-25 20:21:40] Epoch 1 | Step 1880 | Loss: 1.5225 | LR: 1.00e-04 +[2026-04-25 20:21:43] Epoch 1 | Step 1890 | Loss: 1.5236 | LR: 1.00e-04 +[2026-04-25 20:21:46] Epoch 1 | Step 1900 | Loss: 1.5237 | LR: 1.00e-04 +[2026-04-25 20:21:48] Epoch 1 | Step 1910 | Loss: 1.5251 | LR: 1.00e-04 +[2026-04-25 20:21:51] Epoch 1 | Step 1920 | Loss: 1.5261 | LR: 1.00e-04 +[2026-04-25 20:21:53] Epoch 1 | Step 1930 | Loss: 1.5267 | LR: 1.00e-04 +[2026-04-25 20:21:56] Epoch 1 | Step 1940 | Loss: 1.5268 | LR: 1.00e-04 +[2026-04-25 20:21:59] Epoch 1 | Step 1950 | Loss: 1.5263 | LR: 1.00e-04 +[2026-04-25 20:22:01] Epoch 1 | Step 1960 | Loss: 1.5272 | LR: 1.00e-04 +[2026-04-25 20:22:04] Epoch 1 | Step 1970 | Loss: 1.5278 | LR: 1.00e-04 +[2026-04-25 20:22:06] Epoch 1 | Step 1980 | Loss: 1.5290 | LR: 1.00e-04 +[2026-04-25 20:22:08] Epoch 1 | Step 1990 | Loss: 1.5296 | LR: 1.00e-04 +[2026-04-25 20:22:11] Epoch 1 | Step 2000 | Loss: 1.5301 | LR: 1.00e-04 +[2026-04-25 20:22:11] Validation | Batch 10/84 | Loss: 1.5171 +[2026-04-25 20:22:11] Validation | Batch 20/84 | Loss: 1.5820 +[2026-04-25 20:22:12] Validation | Batch 30/84 | Loss: 1.7006 +[2026-04-25 20:22:12] Validation | Batch 40/84 | Loss: 1.6892 +[2026-04-25 20:22:13] Validation | Batch 50/84 | Loss: 1.6662 +[2026-04-25 20:22:13] Validation | Batch 60/84 | Loss: 1.6410 +[2026-04-25 20:22:14] Validation | Batch 70/84 | Loss: 1.6259 +[2026-04-25 20:22:14] Validation | Batch 80/84 | Loss: 1.6316 +[2026-04-25 20:22:14] Validation | Batch 84/84 | Loss: 1.6241 +[2026-04-25 20:22:15] Validation | Loss: 1.6241 | PPL: 5.23 | Time: 3.87s +[2026-04-25 20:22:17] New best model saved! Val loss: 1.6241 +[2026-04-25 20:22:20] Epoch 1 | Step 2010 | Loss: 1.5307 | LR: 1.00e-04 +[2026-04-25 20:22:22] Epoch 1 | Step 2020 | Loss: 1.5311 | LR: 1.00e-04 +[2026-04-25 20:22:25] Epoch 1 | Step 2030 | Loss: 1.5320 | LR: 1.00e-04 +[2026-04-25 20:22:27] Epoch 1 | Step 2040 | Loss: 1.5323 | LR: 1.00e-04 +[2026-04-25 20:22:30] Epoch 1 | Step 2050 | Loss: 1.5330 | LR: 1.00e-04 +[2026-04-25 20:22:33] Epoch 1 | Step 2060 | Loss: 1.5336 | LR: 1.00e-04 +[2026-04-25 20:22:35] Epoch 1 | Step 2070 | Loss: 1.5330 | LR: 1.00e-04 +[2026-04-25 20:22:37] Epoch 1 | Step 2080 | Loss: 1.5334 | LR: 1.00e-04 +[2026-04-25 20:22:40] Epoch 1 | Step 2090 | Loss: 1.5343 | LR: 1.00e-04 +[2026-04-25 20:22:42] Epoch 1 | Step 2100 | Loss: 1.5348 | LR: 1.00e-04 +[2026-04-25 20:22:45] Epoch 1 | Step 2110 | Loss: 1.5352 | LR: 1.00e-04 +[2026-04-25 20:22:48] Epoch 1 | Step 2120 | Loss: 1.5351 | LR: 1.00e-04 +[2026-04-25 20:22:50] Epoch 1 | Step 2130 | Loss: 1.5360 | LR: 1.00e-04 +[2026-04-25 20:22:53] Epoch 1 | Step 2140 | Loss: 1.5362 | LR: 1.00e-04 +[2026-04-25 20:22:55] Epoch 1 | Step 2150 | Loss: 1.5364 | LR: 1.00e-04 +[2026-04-25 20:22:58] Epoch 1 | Step 2160 | Loss: 1.5376 | LR: 1.00e-04 +[2026-04-25 20:23:00] Epoch 1 | Step 2170 | Loss: 1.5378 | LR: 1.00e-04 +[2026-04-25 20:23:02] Epoch 1 | Step 2180 | Loss: 1.5377 | LR: 1.00e-04 +[2026-04-25 20:23:05] Epoch 1 | Step 2190 | Loss: 1.5383 | LR: 1.00e-04 +[2026-04-25 20:23:07] Epoch 1 | Step 2200 | Loss: 1.5383 | LR: 1.00e-04 +[2026-04-25 20:23:10] Epoch 1 | Step 2210 | Loss: 1.5385 | LR: 1.00e-04 +[2026-04-25 20:23:12] Epoch 1 | Step 2220 | Loss: 1.5400 | LR: 1.00e-04 +[2026-04-25 20:23:15] Epoch 1 | Step 2230 | Loss: 1.5411 | LR: 1.00e-04 +[2026-04-25 20:23:17] Epoch 1 | Step 2240 | Loss: 1.5420 | LR: 1.00e-04 +[2026-04-25 20:23:20] Epoch 1 | Step 2250 | Loss: 1.5429 | LR: 1.00e-04 +[2026-04-25 20:23:22] Epoch 1 | Step 2260 | Loss: 1.5429 | LR: 1.00e-04 +[2026-04-25 20:23:25] Epoch 1 | Step 2270 | Loss: 1.5435 | LR: 1.00e-04 +[2026-04-25 20:23:27] Epoch 1 | Step 2280 | Loss: 1.5441 | LR: 1.00e-04 +[2026-04-25 20:23:30] Epoch 1 | Step 2290 | Loss: 1.5455 | LR: 1.00e-04 +[2026-04-25 20:23:32] Epoch 1 | Step 2300 | Loss: 1.5461 | LR: 1.00e-04 +[2026-04-25 20:23:35] Epoch 1 | Step 2310 | Loss: 1.5464 | LR: 1.00e-04 +[2026-04-25 20:23:37] Epoch 1 | Step 2320 | Loss: 1.5469 | LR: 1.00e-04 +[2026-04-25 20:23:40] Epoch 1 | Step 2330 | Loss: 1.5473 | LR: 1.00e-04 +[2026-04-25 20:23:42] Epoch 1 | Step 2340 | Loss: 1.5475 | LR: 1.00e-04 +[2026-04-25 20:23:45] Epoch 1 | Step 2350 | Loss: 1.5475 | LR: 1.00e-04 +[2026-04-25 20:23:47] Epoch 1 | Step 2360 | Loss: 1.5482 | LR: 1.00e-04 +[2026-04-25 20:23:50] Epoch 1 | Step 2370 | Loss: 1.5483 | LR: 1.00e-04 +[2026-04-25 20:23:52] Epoch 1 | Step 2380 | Loss: 1.5485 | LR: 1.00e-04 +[2026-04-25 20:23:55] Epoch 1 | Step 2390 | Loss: 1.5492 | LR: 1.00e-04 +[2026-04-25 20:23:57] Epoch 1 | Step 2400 | Loss: 1.5490 | LR: 1.00e-04 +[2026-04-25 20:24:00] Epoch 1 | Step 2410 | Loss: 1.5502 | LR: 1.00e-04 +[2026-04-25 20:24:02] Epoch 1 | Step 2420 | Loss: 1.5507 | LR: 1.00e-04 +[2026-04-25 20:24:05] Epoch 1 | Step 2430 | Loss: 1.5513 | LR: 1.00e-04 +[2026-04-25 20:24:07] Epoch 1 | Step 2440 | Loss: 1.5511 | LR: 1.00e-04 +[2026-04-25 20:24:10] Epoch 1 | Step 2450 | Loss: 1.5511 | LR: 1.00e-04 +[2026-04-25 20:24:12] Epoch 1 | Step 2460 | Loss: 1.5516 | LR: 1.00e-04 +[2026-04-25 20:24:15] Epoch 1 | Step 2470 | Loss: 1.5521 | LR: 1.00e-04 +[2026-04-25 20:24:18] Epoch 1 | Step 2480 | Loss: 1.5526 | LR: 1.00e-04 +[2026-04-25 20:24:20] Epoch 1 | Step 2490 | Loss: 1.5522 | LR: 1.00e-04 +[2026-04-25 20:24:23] Epoch 1 | Step 2500 | Loss: 1.5523 | LR: 1.00e-04 +[2026-04-25 20:24:25] Epoch 1 | Step 2510 | Loss: 1.5528 | LR: 1.00e-04 +[2026-04-25 20:24:28] Epoch 1 | Step 2520 | Loss: 1.5526 | LR: 1.00e-04 +[2026-04-25 20:24:30] Epoch 1 | Step 2530 | Loss: 1.5525 | LR: 1.00e-04 +[2026-04-25 20:24:33] Epoch 1 | Step 2540 | Loss: 1.5528 | LR: 1.00e-04 +[2026-04-25 20:24:36] Epoch 1 | Step 2550 | Loss: 1.5524 | LR: 1.00e-04 +[2026-04-25 20:24:38] Epoch 1 | Step 2560 | Loss: 1.5531 | LR: 1.00e-04 +[2026-04-25 20:24:41] Epoch 1 | Step 2570 | Loss: 1.5537 | LR: 1.00e-04 +[2026-04-25 20:24:44] Epoch 1 | Step 2580 | Loss: 1.5546 | LR: 1.00e-04 +[2026-04-25 20:24:46] Epoch 1 | Step 2590 | Loss: 1.5551 | LR: 1.00e-04 +[2026-04-25 20:24:49] Epoch 1 | Step 2600 | Loss: 1.5555 | LR: 1.00e-04 +[2026-04-25 20:24:51] Epoch 1 | Step 2610 | Loss: 1.5558 | LR: 1.00e-04 +[2026-04-25 20:24:53] Epoch 1 | Step 2620 | Loss: 1.5558 | LR: 1.00e-04 +[2026-04-25 20:24:56] Epoch 1 | Step 2630 | Loss: 1.5557 | LR: 1.00e-04 +[2026-04-25 20:24:58] Epoch 1 | Step 2640 | Loss: 1.5563 | LR: 1.00e-04 +[2026-04-25 20:25:01] Epoch 1 | Step 2650 | Loss: 1.5562 | LR: 1.00e-04 +[2026-04-25 20:25:04] Epoch 1 | Step 2660 | Loss: 1.5565 | LR: 1.00e-04 +[2026-04-25 20:25:06] Epoch 1 | Step 2670 | Loss: 1.5565 | LR: 1.00e-04 +[2026-04-25 20:25:09] Epoch 1 | Step 2680 | Loss: 1.5565 | LR: 1.00e-04 +[2026-04-25 20:25:11] Epoch 1 | Step 2690 | Loss: 1.5568 | LR: 1.00e-04 +[2026-04-25 20:25:14] Epoch 1 | Step 2700 | Loss: 1.5568 | LR: 1.00e-04 +[2026-04-25 20:25:16] Epoch 1 | Step 2710 | Loss: 1.5566 | LR: 1.00e-04 +[2026-04-25 20:25:19] Epoch 1 | Step 2720 | Loss: 1.5572 | LR: 1.00e-04 +[2026-04-25 20:25:21] Epoch 1 | Step 2730 | Loss: 1.5574 | LR: 1.00e-04 +[2026-04-25 20:25:24] Epoch 1 | Step 2740 | Loss: 1.5581 | LR: 1.00e-04 +[2026-04-25 20:25:26] Epoch 1 | Step 2750 | Loss: 1.5587 | LR: 1.00e-04 +[2026-04-25 20:25:29] Epoch 1 | Step 2760 | Loss: 1.5583 | LR: 1.00e-04 +[2026-04-25 20:25:31] Epoch 1 | Step 2770 | Loss: 1.5584 | LR: 1.00e-04 +[2026-04-25 20:25:34] Epoch 1 | Step 2780 | Loss: 1.5593 | LR: 1.00e-04 +[2026-04-25 20:25:36] Epoch 1 | Step 2790 | Loss: 1.5594 | LR: 1.00e-04 +[2026-04-25 20:25:39] Epoch 1 | Step 2800 | Loss: 1.5594 | LR: 1.00e-04 +[2026-04-25 20:25:41] Epoch 1 | Step 2810 | Loss: 1.5602 | LR: 1.00e-04 +[2026-04-25 20:25:44] Epoch 1 | Step 2820 | Loss: 1.5605 | LR: 1.00e-04 +[2026-04-25 20:25:46] Epoch 1 | Step 2830 | Loss: 1.5604 | LR: 1.00e-04 +[2026-04-25 20:25:49] Epoch 1 | Step 2840 | Loss: 1.5615 | LR: 1.00e-04 +[2026-04-25 20:25:51] Epoch 1 | Step 2850 | Loss: 1.5618 | LR: 1.00e-04 +[2026-04-25 20:25:54] Epoch 1 | Step 2860 | Loss: 1.5620 | LR: 1.00e-04 +[2026-04-25 20:25:56] Epoch 1 | Step 2870 | Loss: 1.5624 | LR: 1.00e-04 +[2026-04-25 20:25:59] Epoch 1 | Step 2880 | Loss: 1.5624 | LR: 1.00e-04 +[2026-04-25 20:26:02] Epoch 1 | Step 2890 | Loss: 1.5625 | LR: 1.00e-04 +[2026-04-25 20:26:04] Epoch 1 | Step 2900 | Loss: 1.5620 | LR: 1.00e-04 +[2026-04-25 20:26:07] Epoch 1 | Step 2910 | Loss: 1.5624 | LR: 1.00e-04 +[2026-04-25 20:26:10] Epoch 1 | Step 2920 | Loss: 1.5628 | LR: 1.00e-04 +[2026-04-25 20:26:12] Epoch 1 | Step 2930 | Loss: 1.5627 | LR: 1.00e-04 +[2026-04-25 20:26:15] Epoch 1 | Step 2940 | Loss: 1.5626 | LR: 1.00e-04 +[2026-04-25 20:26:17] Epoch 1 | Step 2950 | Loss: 1.5634 | LR: 1.00e-04 +[2026-04-25 20:26:20] Epoch 1 | Step 2960 | Loss: 1.5637 | LR: 1.00e-04 +[2026-04-25 20:26:23] Epoch 1 | Step 2970 | Loss: 1.5641 | LR: 1.00e-04 +[2026-04-25 20:26:25] Epoch 1 | Step 2980 | Loss: 1.5642 | LR: 1.00e-04 +[2026-04-25 20:26:28] Epoch 1 | Step 2990 | Loss: 1.5646 | LR: 1.00e-04 +[2026-04-25 20:26:31] Epoch 1 | Step 3000 | Loss: 1.5647 | LR: 1.00e-04 +[2026-04-25 20:26:33] Epoch 1 | Step 3010 | Loss: 1.5650 | LR: 1.00e-04 +[2026-04-25 20:26:36] Epoch 1 | Step 3020 | Loss: 1.5650 | LR: 1.00e-04 +[2026-04-25 20:26:38] Epoch 1 | Step 3030 | Loss: 1.5649 | LR: 1.00e-04 +[2026-04-25 20:26:41] Epoch 1 | Step 3040 | Loss: 1.5644 | LR: 1.00e-04 +[2026-04-25 20:26:43] Epoch 1 | Step 3050 | Loss: 1.5641 | LR: 1.00e-04 +[2026-04-25 20:26:46] Epoch 1 | Step 3060 | Loss: 1.5644 | LR: 1.00e-04 +[2026-04-25 20:26:48] Epoch 1 | Step 3070 | Loss: 1.5644 | LR: 1.00e-04 +[2026-04-25 20:26:51] Epoch 1 | Step 3080 | Loss: 1.5647 | LR: 1.00e-04 +[2026-04-25 20:26:54] Epoch 1 | Step 3090 | Loss: 1.5645 | LR: 1.00e-04 +[2026-04-25 20:26:56] Epoch 1 | Step 3100 | Loss: 1.5646 | LR: 1.00e-04 +[2026-04-25 20:26:58] Epoch 1 | Step 3110 | Loss: 1.5645 | LR: 1.00e-04 +[2026-04-25 20:27:01] Epoch 1 | Step 3120 | Loss: 1.5655 | LR: 1.00e-04 +[2026-04-25 20:27:04] Epoch 1 | Step 3130 | Loss: 1.5655 | LR: 1.00e-04 +[2026-04-25 20:27:06] Epoch 1 | Step 3140 | Loss: 1.5659 | LR: 1.00e-04 +[2026-04-25 20:27:09] Epoch 1 | Step 3150 | Loss: 1.5664 | LR: 1.00e-04 +[2026-04-25 20:27:11] Epoch 1 | Step 3160 | Loss: 1.5666 | LR: 1.00e-04 +[2026-04-25 20:27:14] Epoch 1 | Step 3170 | Loss: 1.5669 | LR: 1.00e-04 +[2026-04-25 20:27:16] Epoch 1 | Step 3180 | Loss: 1.5673 | LR: 1.00e-04 +[2026-04-25 20:27:19] Epoch 1 | Step 3190 | Loss: 1.5670 | LR: 1.00e-04 +[2026-04-25 20:27:21] Epoch 1 | Step 3200 | Loss: 1.5671 | LR: 1.00e-04 +[2026-04-25 20:27:24] Epoch 1 | Step 3210 | Loss: 1.5670 | LR: 1.00e-04 +[2026-04-25 20:27:26] Epoch 1 | Step 3220 | Loss: 1.5668 | LR: 1.00e-04 +[2026-04-25 20:27:29] Epoch 1 | Step 3230 | Loss: 1.5675 | LR: 1.00e-04 +[2026-04-25 20:27:31] Epoch 1 | Step 3240 | Loss: 1.5673 | LR: 1.00e-04 +[2026-04-25 20:27:33] Epoch 1 | Step 3250 | Loss: 1.5676 | LR: 1.00e-04 +[2026-04-25 20:27:36] Epoch 1 | Step 3260 | Loss: 1.5680 | LR: 1.00e-04 +[2026-04-25 20:27:38] Epoch 1 | Step 3270 | Loss: 1.5682 | LR: 1.00e-04 +[2026-04-25 20:27:41] Epoch 1 | Step 3280 | Loss: 1.5678 | LR: 1.00e-04 +[2026-04-25 20:27:44] Epoch 1 | Step 3290 | Loss: 1.5681 | LR: 1.00e-04 +[2026-04-25 20:27:46] Epoch 1 | Step 3300 | Loss: 1.5686 | LR: 1.00e-04 +[2026-04-25 20:27:49] Epoch 1 | Step 3310 | Loss: 1.5687 | LR: 1.00e-04 +[2026-04-25 20:27:51] Epoch 1 | Step 3320 | Loss: 1.5691 | LR: 1.00e-04 +[2026-04-25 20:27:54] Epoch 1 | Step 3330 | Loss: 1.5691 | LR: 1.00e-04 +[2026-04-25 20:27:57] Epoch 1 | Step 3340 | Loss: 1.5695 | LR: 1.00e-04 +[2026-04-25 20:27:59] Epoch 1 | Step 3350 | Loss: 1.5691 | LR: 1.00e-04 +[2026-04-25 20:28:02] Epoch 1 | Step 3360 | Loss: 1.5694 | LR: 1.00e-04 +[2026-04-25 20:28:04] Epoch 1 | Step 3370 | Loss: 1.5699 | LR: 1.00e-04 +[2026-04-25 20:28:07] Epoch 1 | Step 3380 | Loss: 1.5699 | LR: 1.00e-04 +[2026-04-25 20:28:10] Epoch 1 | Step 3390 | Loss: 1.5703 | LR: 1.00e-04 +[2026-04-25 20:28:12] Epoch 1 | Step 3400 | Loss: 1.5709 | LR: 1.00e-04 +[2026-04-25 20:28:15] Epoch 1 | Step 3410 | Loss: 1.5709 | LR: 1.00e-04 +[2026-04-25 20:28:18] Epoch 1 | Step 3420 | Loss: 1.5706 | LR: 1.00e-04 +[2026-04-25 20:28:20] Epoch 1 | Step 3430 | Loss: 1.5710 | LR: 1.00e-04 +[2026-04-25 20:28:23] Epoch 1 | Step 3440 | Loss: 1.5715 | LR: 1.00e-04 +[2026-04-25 20:28:25] Epoch 1 | Step 3450 | Loss: 1.5715 | LR: 1.00e-04 +[2026-04-25 20:28:28] Epoch 1 | Step 3460 | Loss: 1.5717 | LR: 1.00e-04 +[2026-04-25 20:28:31] Epoch 1 | Step 3470 | Loss: 1.5719 | LR: 1.00e-04 +[2026-04-25 20:28:33] Epoch 1 | Step 3480 | Loss: 1.5719 | LR: 1.00e-04 +[2026-04-25 20:28:35] Epoch 1 | Step 3490 | Loss: 1.5718 | LR: 1.00e-04 +[2026-04-25 20:28:38] Epoch 1 | Step 3500 | Loss: 1.5716 | LR: 1.00e-04 +[2026-04-25 20:28:41] Epoch 1 | Step 3510 | Loss: 1.5723 | LR: 1.00e-04 +[2026-04-25 20:28:43] Epoch 1 | Step 3520 | Loss: 1.5721 | LR: 1.00e-04 +[2026-04-25 20:28:46] Epoch 1 | Step 3530 | Loss: 1.5726 | LR: 1.00e-04 +[2026-04-25 20:28:48] Epoch 1 | Step 3540 | Loss: 1.5724 | LR: 1.00e-04 +[2026-04-25 20:28:51] Epoch 1 | Step 3550 | Loss: 1.5723 | LR: 1.00e-04 +[2026-04-25 20:28:54] Epoch 1 | Step 3560 | Loss: 1.5724 | LR: 1.00e-04 +[2026-04-25 20:28:56] Epoch 1 | Step 3570 | Loss: 1.5723 | LR: 1.00e-04 +[2026-04-25 20:28:59] Epoch 1 | Step 3580 | Loss: 1.5725 | LR: 1.00e-04 +[2026-04-25 20:29:01] Epoch 1 | Step 3590 | Loss: 1.5727 | LR: 1.00e-04 +[2026-04-25 20:29:04] Epoch 1 | Step 3600 | Loss: 1.5727 | LR: 1.00e-04 +[2026-04-25 20:29:06] Epoch 1 | Step 3610 | Loss: 1.5727 | LR: 1.00e-04 +[2026-04-25 20:29:09] Epoch 1 | Step 3620 | Loss: 1.5729 | LR: 1.00e-04 +[2026-04-25 20:29:11] Epoch 1 | Step 3630 | Loss: 1.5735 | LR: 1.00e-04 +[2026-04-25 20:29:14] Epoch 1 | Step 3640 | Loss: 1.5739 | LR: 1.00e-04 +[2026-04-25 20:29:16] Epoch 1 | Step 3650 | Loss: 1.5743 | LR: 1.00e-04 +[2026-04-25 20:29:19] Epoch 1 | Step 3660 | Loss: 1.5740 | LR: 1.00e-04 +[2026-04-25 20:29:21] Epoch 1 | Step 3670 | Loss: 1.5740 | LR: 1.00e-04 +[2026-04-25 20:29:24] Epoch 1 | Step 3680 | Loss: 1.5743 | LR: 1.00e-04 +[2026-04-25 20:29:26] Epoch 1 | Step 3690 | Loss: 1.5743 | LR: 1.00e-04 +[2026-04-25 20:29:29] Epoch 1 | Step 3700 | Loss: 1.5742 | LR: 1.00e-04 +[2026-04-25 20:29:31] Epoch 1 | Step 3710 | Loss: 1.5744 | LR: 1.00e-04 +[2026-04-25 20:29:34] Epoch 1 | Step 3720 | Loss: 1.5747 | LR: 1.00e-04 +[2026-04-25 20:29:36] Epoch 1 | Step 3730 | Loss: 1.5748 | LR: 1.00e-04 +[2026-04-25 20:29:39] Epoch 1 | Step 3740 | Loss: 1.5751 | LR: 1.00e-04 +[2026-04-25 20:29:41] Epoch 1 | Step 3750 | Loss: 1.5749 | LR: 1.00e-04 +[2026-04-25 20:29:44] Epoch 1 | Step 3760 | Loss: 1.5752 | LR: 1.00e-04 +[2026-04-25 20:29:47] Epoch 1 | Step 3770 | Loss: 1.5755 | LR: 1.00e-04 +[2026-04-25 20:29:49] Epoch 1 | Step 3780 | Loss: 1.5758 | LR: 1.00e-04 +[2026-04-25 20:29:52] Epoch 1 | Step 3790 | Loss: 1.5760 | LR: 1.00e-04 +[2026-04-25 20:29:54] Epoch 1 | Step 3800 | Loss: 1.5764 | LR: 1.00e-04 +[2026-04-25 20:29:57] Epoch 1 | Step 3810 | Loss: 1.5759 | LR: 1.00e-04 +[2026-04-25 20:29:59] Epoch 1 | Step 3820 | Loss: 1.5757 | LR: 1.00e-04 +[2026-04-25 20:30:02] Epoch 1 | Step 3830 | Loss: 1.5759 | LR: 1.00e-04 +[2026-04-25 20:30:04] Epoch 1 | Step 3840 | Loss: 1.5763 | LR: 1.00e-04 +[2026-04-25 20:30:07] Epoch 1 | Step 3850 | Loss: 1.5760 | LR: 1.00e-04 +[2026-04-25 20:30:10] Epoch 1 | Step 3860 | Loss: 1.5761 | LR: 1.00e-04 +[2026-04-25 20:30:12] Epoch 1 | Step 3870 | Loss: 1.5763 | LR: 1.00e-04 +[2026-04-25 20:30:15] Epoch 1 | Step 3880 | Loss: 1.5761 | LR: 1.00e-04 +[2026-04-25 20:30:18] Epoch 1 | Step 3890 | Loss: 1.5762 | LR: 1.00e-04 +[2026-04-25 20:30:20] Epoch 1 | Step 3900 | Loss: 1.5763 | LR: 1.00e-04 +[2026-04-25 20:30:23] Epoch 1 | Step 3910 | Loss: 1.5766 | LR: 1.00e-04 +[2026-04-25 20:30:25] Epoch 1 | Step 3920 | Loss: 1.5770 | LR: 1.00e-04 +[2026-04-25 20:30:28] Epoch 1 | Step 3930 | Loss: 1.5771 | LR: 1.00e-04 +[2026-04-25 20:30:30] Epoch 1 | Step 3940 | Loss: 1.5772 | LR: 1.00e-04 +[2026-04-25 20:30:33] Epoch 1 | Step 3950 | Loss: 1.5771 | LR: 1.00e-04 +[2026-04-25 20:30:36] Epoch 1 | Step 3960 | Loss: 1.5774 | LR: 1.00e-04 +[2026-04-25 20:30:38] Epoch 1 | Step 3970 | Loss: 1.5774 | LR: 9.99e-05 +[2026-04-25 20:30:41] Epoch 1 | Step 3980 | Loss: 1.5776 | LR: 9.99e-05 +[2026-04-25 20:30:43] Epoch 1 | Step 3990 | Loss: 1.5776 | LR: 9.97e-05 +[2026-04-25 20:30:46] Epoch 1 | Step 4000 | Loss: 1.5778 | LR: 9.95e-05 +[2026-04-25 20:30:46] Validation | Batch 10/84 | Loss: 1.5538 +[2026-04-25 20:30:47] Validation | Batch 20/84 | Loss: 1.5837 +[2026-04-25 20:30:47] Validation | Batch 30/84 | Loss: 1.6910 +[2026-04-25 20:30:48] Validation | Batch 40/84 | Loss: 1.6895 +[2026-04-25 20:30:48] Validation | Batch 50/84 | Loss: 1.6701 +[2026-04-25 20:30:49] Validation | Batch 60/84 | Loss: 1.6431 +[2026-04-25 20:30:49] Validation | Batch 70/84 | Loss: 1.6177 +[2026-04-25 20:30:49] Validation | Batch 80/84 | Loss: 1.6208 +[2026-04-25 20:30:50] Validation | Batch 84/84 | Loss: 1.6048 +[2026-04-25 20:30:50] Validation | Loss: 1.6048 | PPL: 5.10 | Time: 3.82s +[2026-04-25 20:30:53] New best model saved! Val loss: 1.6048 +[2026-04-25 20:30:55] Epoch 1 | Step 4010 | Loss: 1.5777 | LR: 9.93e-05 +[2026-04-25 20:30:57] Epoch 1 | Step 4020 | Loss: 1.5780 | LR: 9.90e-05 +[2026-04-25 20:31:00] Epoch 1 | Step 4030 | Loss: 1.5778 | LR: 9.87e-05 +[2026-04-25 20:31:02] Epoch 1 | Step 4040 | Loss: 1.5774 | LR: 9.84e-05 +[2026-04-25 20:31:05] Epoch 1 | Step 4050 | Loss: 1.5774 | LR: 9.80e-05 +[2026-04-25 20:31:07] Epoch 1 | Step 4060 | Loss: 1.5768 | LR: 9.75e-05 +[2026-04-25 20:31:10] Epoch 1 | Step 4070 | Loss: 1.5771 | LR: 9.70e-05 +[2026-04-25 20:31:12] Epoch 1 | Step 4080 | Loss: 1.5774 | LR: 9.65e-05 +[2026-04-25 20:31:15] Epoch 1 | Step 4090 | Loss: 1.5776 | LR: 9.59e-05 +[2026-04-25 20:31:17] Epoch 1 | Step 4100 | Loss: 1.5777 | LR: 9.53e-05 +[2026-04-25 20:31:20] Epoch 1 | Step 4110 | Loss: 1.5778 | LR: 9.46e-05 +[2026-04-25 20:31:22] Epoch 1 | Step 4120 | Loss: 1.5781 | LR: 9.40e-05 +[2026-04-25 20:31:25] Epoch 1 | Step 4130 | Loss: 1.5780 | LR: 9.32e-05 +[2026-04-25 20:31:28] Epoch 1 | Step 4140 | Loss: 1.5782 | LR: 9.24e-05 +[2026-04-25 20:31:31] Epoch 1 | Step 4150 | Loss: 1.5789 | LR: 9.16e-05 +[2026-04-25 20:31:33] Epoch 1 | Step 4160 | Loss: 1.5795 | LR: 9.08e-05 +[2026-04-25 20:31:36] Epoch 1 | Step 4170 | Loss: 1.5796 | LR: 8.99e-05 +[2026-04-25 20:31:38] Epoch 1 | Step 4180 | Loss: 1.5796 | LR: 8.90e-05 +[2026-04-25 20:31:41] Epoch 1 | Step 4190 | Loss: 1.5796 | LR: 8.80e-05 +[2026-04-25 20:31:44] Epoch 1 | Step 4200 | Loss: 1.5803 | LR: 8.70e-05 +[2026-04-25 20:31:46] Epoch 1 | Step 4210 | Loss: 1.5801 | LR: 8.60e-05 +[2026-04-25 20:31:49] Epoch 1 | Step 4220 | Loss: 1.5806 | LR: 8.50e-05 +[2026-04-25 20:31:52] Epoch 1 | Step 4230 | Loss: 1.5808 | LR: 8.39e-05 +[2026-04-25 20:31:54] Epoch 1 | Step 4240 | Loss: 1.5812 | LR: 8.28e-05 +[2026-04-25 20:31:57] Epoch 1 | Step 4250 | Loss: 1.5813 | LR: 8.16e-05 +[2026-04-25 20:32:00] Epoch 1 | Step 4260 | Loss: 1.5809 | LR: 8.05e-05 +[2026-04-25 20:32:02] Epoch 1 | Step 4270 | Loss: 1.5812 | LR: 7.93e-05 +[2026-04-25 20:32:04] Epoch 1 | Step 4280 | Loss: 1.5811 | LR: 7.81e-05 +[2026-04-25 20:32:07] Epoch 1 | Step 4290 | Loss: 1.5810 | LR: 7.68e-05 +[2026-04-25 20:32:10] Epoch 1 | Step 4300 | Loss: 1.5810 | LR: 7.56e-05 +[2026-04-25 20:32:12] Epoch 1 | Step 4310 | Loss: 1.5812 | LR: 7.43e-05 +[2026-04-25 20:32:15] Epoch 1 | Step 4320 | Loss: 1.5812 | LR: 7.30e-05 +[2026-04-25 20:32:17] Epoch 1 | Step 4330 | Loss: 1.5812 | LR: 7.16e-05 +[2026-04-25 20:32:20] Epoch 1 | Step 4340 | Loss: 1.5808 | LR: 7.03e-05 +[2026-04-25 20:32:22] Epoch 1 | Step 4350 | Loss: 1.5806 | LR: 6.90e-05 +[2026-04-25 20:32:25] Epoch 1 | Step 4360 | Loss: 1.5804 | LR: 6.76e-05 +[2026-04-25 20:32:27] Epoch 1 | Step 4370 | Loss: 1.5804 | LR: 6.62e-05 +[2026-04-25 20:32:30] Epoch 1 | Step 4380 | Loss: 1.5802 | LR: 6.48e-05 +[2026-04-25 20:32:32] Epoch 1 | Step 4390 | Loss: 1.5803 | LR: 6.34e-05 +[2026-04-25 20:32:35] Epoch 1 | Step 4400 | Loss: 1.5800 | LR: 6.20e-05 +[2026-04-25 20:32:37] Epoch 1 | Step 4410 | Loss: 1.5795 | LR: 6.06e-05 +[2026-04-25 20:32:40] Epoch 1 | Step 4420 | Loss: 1.5798 | LR: 5.92e-05 +[2026-04-25 20:32:42] Epoch 1 | Step 4430 | Loss: 1.5796 | LR: 5.78e-05 +[2026-04-25 20:32:45] Epoch 1 | Step 4440 | Loss: 1.5799 | LR: 5.63e-05 +[2026-04-25 20:32:47] Epoch 1 | Step 4450 | Loss: 1.5797 | LR: 5.49e-05 +[2026-04-25 20:32:50] Epoch 1 | Step 4460 | Loss: 1.5801 | LR: 5.35e-05 +[2026-04-25 20:32:53] Epoch 1 | Step 4470 | Loss: 1.5797 | LR: 5.20e-05 +[2026-04-25 20:32:55] Epoch 1 | Step 4480 | Loss: 1.5792 | LR: 5.06e-05 +[2026-04-25 20:32:58] Epoch 1 | Step 4490 | Loss: 1.5790 | LR: 4.92e-05 +[2026-04-25 20:33:00] Epoch 1 | Step 4500 | Loss: 1.5789 | LR: 4.78e-05 +[2026-04-25 20:33:03] Epoch 1 | Step 4510 | Loss: 1.5783 | LR: 4.64e-05 +[2026-04-25 20:33:05] Epoch 1 | Step 4520 | Loss: 1.5778 | LR: 4.50e-05 +[2026-04-25 20:33:08] Epoch 1 | Step 4530 | Loss: 1.5774 | LR: 4.36e-05 +[2026-04-25 20:33:11] Epoch 1 | Step 4540 | Loss: 1.5771 | LR: 4.22e-05 +[2026-04-25 20:33:14] Epoch 1 | Step 4550 | Loss: 1.5766 | LR: 4.08e-05 +[2026-04-25 20:33:16] Epoch 1 | Step 4560 | Loss: 1.5764 | LR: 3.95e-05 +[2026-04-25 20:33:19] Epoch 1 | Step 4570 | Loss: 1.5762 | LR: 3.82e-05 +[2026-04-25 20:33:21] Epoch 1 | Step 4580 | Loss: 1.5759 | LR: 3.68e-05 +[2026-04-25 20:33:24] Epoch 1 | Step 4590 | Loss: 1.5756 | LR: 3.55e-05 +[2026-04-25 20:33:26] Epoch 1 | Step 4600 | Loss: 1.5753 | LR: 3.43e-05 +[2026-04-25 20:33:29] Epoch 1 | Step 4610 | Loss: 1.5747 | LR: 3.30e-05 +[2026-04-25 20:33:31] Epoch 1 | Step 4620 | Loss: 1.5747 | LR: 3.18e-05 +[2026-04-25 20:33:34] Epoch 1 | Step 4630 | Loss: 1.5744 | LR: 3.05e-05 +[2026-04-25 20:33:36] Epoch 1 | Step 4640 | Loss: 1.5742 | LR: 2.94e-05 +[2026-04-25 20:33:39] Epoch 1 | Step 4650 | Loss: 1.5740 | LR: 2.82e-05 +[2026-04-25 20:33:41] Epoch 1 | Step 4660 | Loss: 1.5736 | LR: 2.71e-05 +[2026-04-25 20:33:44] Epoch 1 | Step 4670 | Loss: 1.5732 | LR: 2.60e-05 +[2026-04-25 20:33:46] Epoch 1 | Step 4680 | Loss: 1.5731 | LR: 2.49e-05 +[2026-04-25 20:33:49] Epoch 1 | Step 4690 | Loss: 1.5728 | LR: 2.38e-05 +[2026-04-25 20:33:51] Epoch 1 | Step 4700 | Loss: 1.5726 | LR: 2.28e-05 +[2026-04-25 20:33:54] Epoch 1 | Step 4710 | Loss: 1.5722 | LR: 2.18e-05 +[2026-04-25 20:33:57] Epoch 1 | Step 4720 | Loss: 1.5719 | LR: 2.09e-05 +[2026-04-25 20:33:59] Epoch 1 | Step 4730 | Loss: 1.5718 | LR: 2.00e-05 +[2026-04-25 20:34:01] Epoch 1 | Step 4740 | Loss: 1.5713 | LR: 1.91e-05 +[2026-04-25 20:34:04] Epoch 1 | Step 4750 | Loss: 1.5712 | LR: 1.82e-05 +[2026-04-25 20:34:06] Epoch 1 | Step 4760 | Loss: 1.5708 | LR: 1.74e-05 +[2026-04-25 20:34:09] Epoch 1 | Step 4770 | Loss: 1.5702 | LR: 1.67e-05 +[2026-04-25 20:34:11] Epoch 1 | Step 4780 | Loss: 1.5702 | LR: 1.59e-05 +[2026-04-25 20:34:14] Epoch 1 | Step 4790 | Loss: 1.5699 | LR: 1.52e-05 +[2026-04-25 20:34:17] Epoch 1 | Step 4800 | Loss: 1.5694 | LR: 1.46e-05 +[2026-04-25 20:34:19] Epoch 1 | Step 4810 | Loss: 1.5688 | LR: 1.40e-05 +[2026-04-25 20:34:22] Epoch 1 | Step 4820 | Loss: 1.5683 | LR: 1.34e-05 +[2026-04-25 20:34:24] Epoch 1 | Step 4830 | Loss: 1.5677 | LR: 1.29e-05 +[2026-04-25 20:34:27] Epoch 1 | Step 4840 | Loss: 1.5674 | LR: 1.24e-05 +[2026-04-25 20:34:30] Epoch 1 | Step 4850 | Loss: 1.5676 | LR: 1.20e-05 +[2026-04-25 20:34:32] Epoch 1 | Step 4860 | Loss: 1.5675 | LR: 1.16e-05 +[2026-04-25 20:34:35] Epoch 1 | Step 4870 | Loss: 1.5674 | LR: 1.12e-05 +[2026-04-25 20:34:37] Epoch 1 | Step 4880 | Loss: 1.5671 | LR: 1.09e-05 +[2026-04-25 20:34:40] Epoch 1 | Step 4890 | Loss: 1.5666 | LR: 1.06e-05 +[2026-04-25 20:34:43] Epoch 1 | Step 4900 | Loss: 1.5664 | LR: 1.04e-05 +[2026-04-25 20:34:45] Epoch 1 | Step 4910 | Loss: 1.5660 | LR: 1.03e-05 +[2026-04-25 20:34:48] Epoch 1 | Step 4920 | Loss: 1.5658 | LR: 1.01e-05 +[2026-04-25 20:34:50] Epoch 1 | Step 4930 | Loss: 1.5655 | LR: 1.00e-05 +[2026-04-25 20:34:52] Epoch 1 | Step 4940 | Loss: 1.5652 | LR: 1.00e-05 +[2026-04-25 20:34:55] Epoch 1 | Step 4950 | Loss: 1.5649 | LR: 1.00e-05 +[2026-04-25 20:34:58] Epoch 1 | Step 4960 | Loss: 1.5648 | LR: 1.00e-05 +[2026-04-25 20:35:00] Epoch 1 | Step 4970 | Loss: 1.5643 | LR: 1.00e-05 +[2026-04-25 20:35:02] Epoch 1 | Step 4980 | Loss: 1.5642 | LR: 1.00e-05 +[2026-04-25 20:35:05] Epoch 1 | Step 4990 | Loss: 1.5636 | LR: 1.00e-05 +[2026-04-25 20:35:08] Epoch 1 | Step 5000 | Loss: 1.5636 | LR: 1.00e-05 +[2026-04-25 20:35:10] Epoch 1 | Step 5010 | Loss: 1.5631 | LR: 1.00e-05 +[2026-04-25 20:35:12] Epoch 1 | Step 5020 | Loss: 1.5626 | LR: 1.00e-05 +[2026-04-25 20:35:15] Epoch 1 | Step 5030 | Loss: 1.5623 | LR: 1.00e-05 +[2026-04-25 20:35:17] Epoch 1 | Step 5040 | Loss: 1.5618 | LR: 1.00e-05 +[2026-04-25 20:35:20] Epoch 1 | Step 5050 | Loss: 1.5615 | LR: 1.00e-05 +[2026-04-25 20:35:22] Epoch 1 | Step 5060 | Loss: 1.5612 | LR: 1.00e-05 +[2026-04-25 20:35:25] Epoch 1 | Step 5070 | Loss: 1.5609 | LR: 1.00e-05 +[2026-04-25 20:35:27] Epoch 1 | Step 5080 | Loss: 1.5606 | LR: 1.00e-05 +[2026-04-25 20:35:30] Epoch 1 | Step 5090 | Loss: 1.5604 | LR: 1.00e-05 +[2026-04-25 20:35:33] Epoch 1 | Step 5100 | Loss: 1.5599 | LR: 1.00e-05 +[2026-04-25 20:35:35] Epoch 1 | Step 5110 | Loss: 1.5597 | LR: 1.00e-05 +[2026-04-25 20:35:38] Epoch 1 | Step 5120 | Loss: 1.5597 | LR: 1.00e-05 +[2026-04-25 20:35:41] Epoch 1 | Step 5130 | Loss: 1.5593 | LR: 1.00e-05 +[2026-04-25 20:35:43] Epoch 1 | Step 5140 | Loss: 1.5590 | LR: 1.00e-05 +[2026-04-25 20:35:46] Epoch 1 | Step 5150 | Loss: 1.5586 | LR: 1.00e-05 +[2026-04-25 20:35:48] Epoch 1 | Step 5160 | Loss: 1.5579 | LR: 1.00e-05 +[2026-04-25 20:35:51] Epoch 1 | Step 5170 | Loss: 1.5578 | LR: 1.00e-05 +[2026-04-25 20:35:53] Epoch 1 | Step 5180 | Loss: 1.5575 | LR: 1.00e-05 +[2026-04-25 20:35:56] Epoch 1 | Step 5190 | Loss: 1.5573 | LR: 1.00e-05 +[2026-04-25 20:35:59] Epoch 1 | Step 5200 | Loss: 1.5570 | LR: 1.00e-05 +[2026-04-25 20:36:01] Epoch 1 | Step 5210 | Loss: 1.5567 | LR: 1.00e-05 +[2026-04-25 20:36:04] Epoch 1 | Step 5220 | Loss: 1.5564 | LR: 1.00e-05 +[2026-04-25 20:36:07] Epoch 1 | Step 5230 | Loss: 1.5563 | LR: 1.00e-05 +[2026-04-25 20:36:09] Epoch 1 | Step 5240 | Loss: 1.5560 | LR: 1.00e-05 +[2026-04-25 20:36:12] Epoch 1 | Step 5250 | Loss: 1.5560 | LR: 1.00e-05 +[2026-04-25 20:36:14] Epoch 1 | Step 5260 | Loss: 1.5556 | LR: 1.00e-05 +[2026-04-25 20:36:17] Epoch 1 | Step 5270 | Loss: 1.5553 | LR: 1.00e-05 +[2026-04-25 20:36:20] Epoch 1 | Step 5280 | Loss: 1.5548 | LR: 1.00e-05 +[2026-04-25 20:36:22] Epoch 1 | Step 5290 | Loss: 1.5543 | LR: 1.00e-05 +[2026-04-25 20:36:24] Epoch 1 | Step 5300 | Loss: 1.5541 | LR: 1.00e-05 +[2026-04-25 20:36:27] Epoch 1 | Step 5310 | Loss: 1.5539 | LR: 1.00e-05 +[2026-04-25 20:36:29] Epoch 1 | Step 5320 | Loss: 1.5534 | LR: 1.00e-05 +[2026-04-25 20:36:32] Epoch 1 | Step 5330 | Loss: 1.5532 | LR: 1.00e-05 +[2026-04-25 20:36:34] Epoch 1 | Step 5340 | Loss: 1.5528 | LR: 1.00e-05 +[2026-04-25 20:36:37] Epoch 1 | Step 5350 | Loss: 1.5526 | LR: 1.00e-05 +[2026-04-25 20:36:39] Epoch 1 | Step 5360 | Loss: 1.5526 | LR: 1.00e-05 +[2026-04-25 20:36:42] Epoch 1 | Step 5370 | Loss: 1.5523 | LR: 1.00e-05 +[2026-04-25 20:36:44] Epoch 1 | Step 5380 | Loss: 1.5520 | LR: 1.00e-05 +[2026-04-25 20:36:47] Epoch 1 | Step 5390 | Loss: 1.5516 | LR: 1.00e-05 +[2026-04-25 20:36:49] Epoch 1 | Step 5400 | Loss: 1.5511 | LR: 1.00e-05 +[2026-04-25 20:36:52] Epoch 1 | Step 5410 | Loss: 1.5509 | LR: 1.00e-05 +[2026-04-25 20:36:55] Epoch 1 | Step 5420 | Loss: 1.5505 | LR: 1.00e-05 +[2026-04-25 20:36:57] Epoch 1 | Step 5430 | Loss: 1.5502 | LR: 1.00e-05 +[2026-04-25 20:36:59] Epoch 1 | Step 5440 | Loss: 1.5501 | LR: 1.00e-05 +[2026-04-25 20:37:02] Epoch 1 | Step 5450 | Loss: 1.5501 | LR: 1.00e-05 +[2026-04-25 20:37:05] Epoch 1 | Step 5460 | Loss: 1.5498 | LR: 1.00e-05 +[2026-04-25 20:37:07] Epoch 1 | Step 5470 | Loss: 1.5495 | LR: 1.00e-05 +[2026-04-25 20:37:10] Epoch 1 | Step 5480 | Loss: 1.5492 | LR: 1.00e-05 +[2026-04-25 20:37:12] Epoch 1 | Step 5490 | Loss: 1.5491 | LR: 1.00e-05 +[2026-04-25 20:37:15] Epoch 1 | Step 5500 | Loss: 1.5488 | LR: 1.00e-05 +[2026-04-25 20:37:17] Epoch 1 | Step 5510 | Loss: 1.5488 | LR: 1.00e-05 +[2026-04-25 20:37:20] Epoch 1 | Step 5520 | Loss: 1.5484 | LR: 1.00e-05 +[2026-04-25 20:37:22] Epoch 1 | Step 5530 | Loss: 1.5481 | LR: 1.00e-05 +[2026-04-25 20:37:25] Epoch 1 | Step 5540 | Loss: 1.5475 | LR: 1.00e-05 +[2026-04-25 20:37:27] Epoch 1 | Step 5550 | Loss: 1.5473 | LR: 1.00e-05 +[2026-04-25 20:37:30] Epoch 1 | Step 5560 | Loss: 1.5468 | LR: 1.00e-05 +[2026-04-25 20:37:32] Epoch 1 | Step 5570 | Loss: 1.5468 | LR: 1.00e-05 +[2026-04-25 20:37:35] Epoch 1 | Step 5580 | Loss: 1.5464 | LR: 1.00e-05 +[2026-04-25 20:37:38] Epoch 1 | Step 5590 | Loss: 1.5458 | LR: 1.00e-05 +[2026-04-25 20:37:40] Epoch 1 | Step 5600 | Loss: 1.5459 | LR: 1.00e-05 +[2026-04-25 20:37:43] Epoch 1 | Step 5610 | Loss: 1.5459 | LR: 1.00e-05 +[2026-04-25 20:37:45] Epoch 1 | Step 5620 | Loss: 1.5456 | LR: 1.00e-05 +[2026-04-25 20:37:48] Epoch 1 | Step 5630 | Loss: 1.5454 | LR: 1.00e-05 +[2026-04-25 20:37:50] Epoch 1 | Step 5640 | Loss: 1.5452 | LR: 1.00e-05 +[2026-04-25 20:37:53] Epoch 1 | Step 5650 | Loss: 1.5449 | LR: 1.00e-05 +[2026-04-25 20:37:55] Epoch 1 | Step 5660 | Loss: 1.5444 | LR: 1.00e-05 +[2026-04-25 20:37:58] Epoch 1 | Step 5670 | Loss: 1.5442 | LR: 1.00e-05 +[2026-04-25 20:38:01] Epoch 1 | Step 5680 | Loss: 1.5437 | LR: 1.00e-05 +[2026-04-25 20:38:03] Epoch 1 | Step 5690 | Loss: 1.5436 | LR: 1.00e-05 +[2026-04-25 20:38:05] Epoch 1 | Step 5700 | Loss: 1.5432 | LR: 1.00e-05 +[2026-04-25 20:38:08] Epoch 1 | Step 5710 | Loss: 1.5430 | LR: 1.00e-05 +[2026-04-25 20:38:11] Epoch 1 | Step 5720 | Loss: 1.5428 | LR: 1.00e-05 +[2026-04-25 20:38:13] Epoch 1 | Step 5730 | Loss: 1.5425 | LR: 1.00e-05 +[2026-04-25 20:38:16] Epoch 1 | Step 5740 | Loss: 1.5424 | LR: 1.00e-05 +[2026-04-25 20:38:19] Epoch 1 | Step 5750 | Loss: 1.5422 | LR: 1.00e-05 +[2026-04-25 20:38:21] Epoch 1 | Step 5760 | Loss: 1.5420 | LR: 1.00e-05 +[2026-04-25 20:38:24] Epoch 1 | Step 5770 | Loss: 1.5418 | LR: 1.00e-05 +[2026-04-25 20:38:26] Epoch 1 | Step 5780 | Loss: 1.5414 | LR: 1.00e-05 +[2026-04-25 20:38:29] Epoch 1 | Step 5790 | Loss: 1.5414 | LR: 1.00e-05 +[2026-04-25 20:38:31] Epoch 1 | Step 5800 | Loss: 1.5416 | LR: 1.00e-05 +[2026-04-25 20:38:34] Epoch 1 | Step 5810 | Loss: 1.5414 | LR: 1.00e-05 +[2026-04-25 20:38:37] Epoch 1 | Step 5820 | Loss: 1.5410 | LR: 1.00e-05 +[2026-04-25 20:38:39] Epoch 1 | Step 5830 | Loss: 1.5406 | LR: 1.00e-05 +[2026-04-25 20:38:41] Epoch 1 | Step 5840 | Loss: 1.5405 | LR: 1.00e-05 +[2026-04-25 20:38:44] Epoch 1 | Step 5850 | Loss: 1.5404 | LR: 1.00e-05 +[2026-04-25 20:38:46] Epoch 1 | Step 5860 | Loss: 1.5402 | LR: 1.00e-05 +[2026-04-25 20:38:49] Epoch 1 | Step 5870 | Loss: 1.5400 | LR: 1.00e-05 +[2026-04-25 20:38:52] Epoch 1 | Step 5880 | Loss: 1.5400 | LR: 1.00e-05 +[2026-04-25 20:38:54] Epoch 1 | Step 5890 | Loss: 1.5398 | LR: 1.00e-05 +[2026-04-25 20:38:57] Epoch 1 | Step 5900 | Loss: 1.5394 | LR: 1.00e-05 +[2026-04-25 20:39:00] Epoch 1 | Step 5910 | Loss: 1.5393 | LR: 1.00e-05 +[2026-04-25 20:39:02] Epoch 1 | Step 5920 | Loss: 1.5389 | LR: 1.00e-05 +[2026-04-25 20:39:05] Epoch 1 | Step 5930 | Loss: 1.5389 | LR: 1.00e-05 +[2026-04-25 20:39:08] Epoch 1 | Step 5940 | Loss: 1.5385 | LR: 1.00e-05 +[2026-04-25 20:39:10] Epoch 1 | Step 5950 | Loss: 1.5385 | LR: 1.00e-05 +[2026-04-25 20:39:13] Epoch 1 | Step 5960 | Loss: 1.5383 | LR: 1.00e-05 +[2026-04-25 20:39:15] Epoch 1 | Step 5970 | Loss: 1.5382 | LR: 1.00e-05 +[2026-04-25 20:39:18] Epoch 1 | Step 5980 | Loss: 1.5378 | LR: 1.00e-05 +[2026-04-25 20:39:21] Epoch 1 | Step 5990 | Loss: 1.5379 | LR: 1.00e-05 +[2026-04-25 20:39:23] Epoch 1 | Step 6000 | Loss: 1.5376 | LR: 1.00e-05 +[2026-04-25 20:39:24] Validation | Batch 10/84 | Loss: 1.3357 +[2026-04-25 20:39:24] Validation | Batch 20/84 | Loss: 1.3663 +[2026-04-25 20:39:25] Validation | Batch 30/84 | Loss: 1.4614 +[2026-04-25 20:39:25] Validation | Batch 40/84 | Loss: 1.4637 +[2026-04-25 20:39:25] Validation | Batch 50/84 | Loss: 1.4406 +[2026-04-25 20:39:26] Validation | Batch 60/84 | Loss: 1.4091 +[2026-04-25 20:39:26] Validation | Batch 70/84 | Loss: 1.3889 +[2026-04-25 20:39:27] Validation | Batch 80/84 | Loss: 1.3947 +[2026-04-25 20:39:27] Validation | Batch 84/84 | Loss: 1.3849 +[2026-04-25 20:39:27] Validation | Loss: 1.3849 | PPL: 4.09 | Time: 3.80s +[2026-04-25 20:39:30] New best model saved! Val loss: 1.3849 +[2026-04-25 20:39:32] Epoch 1 | Step 6010 | Loss: 1.5375 | LR: 1.00e-05 +[2026-04-25 20:39:35] Epoch 1 | Step 6020 | Loss: 1.5371 | LR: 1.00e-05 +[2026-04-25 20:39:37] Epoch 1 | Step 6030 | Loss: 1.5371 | LR: 1.00e-05 +[2026-04-25 20:39:40] Epoch 1 | Step 6040 | Loss: 1.5369 | LR: 1.00e-05 +[2026-04-25 20:39:43] Epoch 1 | Step 6050 | Loss: 1.5369 | LR: 1.00e-05 +[2026-04-25 20:39:46] Epoch 1 | Step 6060 | Loss: 1.5366 | LR: 1.00e-05 +[2026-04-25 20:39:48] Epoch 1 | Step 6070 | Loss: 1.5362 | LR: 1.00e-05 +[2026-04-25 20:39:51] Epoch 1 | Step 6080 | Loss: 1.5362 | LR: 1.00e-05 +[2026-04-25 20:39:53] Epoch 1 | Step 6090 | Loss: 1.5361 | LR: 1.00e-05 +[2026-04-25 20:39:56] Epoch 1 | Step 6100 | Loss: 1.5361 | LR: 1.00e-05 +[2026-04-25 20:39:59] Epoch 1 | Step 6110 | Loss: 1.5360 | LR: 1.00e-05 +[2026-04-25 20:40:01] Epoch 1 | Step 6120 | Loss: 1.5357 | LR: 1.00e-05 +[2026-04-25 20:40:04] Epoch 1 | Step 6130 | Loss: 1.5353 | LR: 1.00e-05 +[2026-04-25 20:40:06] Epoch 1 | Step 6140 | Loss: 1.5346 | LR: 1.00e-05 +[2026-04-25 20:40:09] Epoch 1 | Step 6150 | Loss: 1.5344 | LR: 1.00e-05 +[2026-04-25 20:40:11] Epoch 1 | Step 6160 | Loss: 1.5341 | LR: 1.00e-05 +[2026-04-25 20:40:14] Epoch 1 | Step 6170 | Loss: 1.5341 | LR: 1.00e-05 +[2026-04-25 20:40:16] Epoch 1 | Step 6180 | Loss: 1.5337 | LR: 1.00e-05 +[2026-04-25 20:40:19] Epoch 1 | Step 6190 | Loss: 1.5333 | LR: 1.00e-05 +[2026-04-25 20:40:22] Epoch 1 | Step 6200 | Loss: 1.5331 | LR: 1.00e-05 +[2026-04-25 20:40:24] Epoch 1 | Step 6210 | Loss: 1.5330 | LR: 1.00e-05 +[2026-04-25 20:40:27] Epoch 1 | Step 6220 | Loss: 1.5329 | LR: 1.00e-05 +[2026-04-25 20:40:30] Epoch 1 | Step 6230 | Loss: 1.5326 | LR: 1.00e-05 +[2026-04-25 20:40:32] Epoch 1 | Step 6240 | Loss: 1.5324 | LR: 1.00e-05 +[2026-04-25 20:40:35] Epoch 1 | Step 6250 | Loss: 1.5319 | LR: 1.00e-05 +[2026-04-25 20:40:37] Epoch 1 | Step 6260 | Loss: 1.5319 | LR: 1.00e-05 +[2026-04-25 20:40:40] Epoch 1 | Step 6270 | Loss: 1.5316 | LR: 1.00e-05 +[2026-04-25 20:40:42] Epoch 1 | Step 6280 | Loss: 1.5310 | LR: 1.00e-05 +[2026-04-25 20:40:45] Epoch 1 | Step 6290 | Loss: 1.5308 | LR: 1.00e-05 +[2026-04-25 20:40:47] Epoch 1 | Step 6300 | Loss: 1.5306 | LR: 1.00e-05 +[2026-04-25 20:40:50] Epoch 1 | Step 6310 | Loss: 1.5306 | LR: 1.00e-05 +[2026-04-25 20:40:52] Epoch 1 | Step 6320 | Loss: 1.5304 | LR: 1.00e-05 +[2026-04-25 20:40:55] Epoch 1 | Step 6330 | Loss: 1.5306 | LR: 1.00e-05 +[2026-04-25 20:40:58] Epoch 1 | Step 6340 | Loss: 1.5306 | LR: 1.00e-05 +[2026-04-25 20:41:00] Epoch 1 | Step 6350 | Loss: 1.5305 | LR: 1.00e-05 +[2026-04-25 20:41:03] Epoch 1 | Step 6360 | Loss: 1.5305 | LR: 1.00e-05 +[2026-04-25 20:41:05] Epoch 1 | Step 6370 | Loss: 1.5302 | LR: 1.00e-05 +[2026-04-25 20:41:08] Epoch 1 | Step 6380 | Loss: 1.5301 | LR: 1.00e-05 +[2026-04-25 20:41:10] Epoch 1 | Step 6390 | Loss: 1.5297 | LR: 1.00e-05 +[2026-04-25 20:41:13] Epoch 1 | Step 6400 | Loss: 1.5294 | LR: 1.00e-05 +[2026-04-25 20:41:15] Epoch 1 | Step 6410 | Loss: 1.5292 | LR: 1.00e-05 +[2026-04-25 20:41:17] Epoch 1 | Step 6420 | Loss: 1.5289 | LR: 1.00e-05 +[2026-04-25 20:41:20] Epoch 1 | Step 6430 | Loss: 1.5286 | LR: 1.00e-05 +[2026-04-25 20:41:22] Epoch 1 | Step 6440 | Loss: 1.5285 | LR: 1.00e-05 +[2026-04-25 20:41:25] Epoch 1 | Step 6450 | Loss: 1.5283 | LR: 1.00e-05 +[2026-04-25 20:41:27] Epoch 1 | Step 6460 | Loss: 1.5277 | LR: 1.00e-05 +[2026-04-25 20:41:30] Epoch 1 | Step 6470 | Loss: 1.5275 | LR: 1.00e-05 +[2026-04-25 20:41:32] Epoch 1 | Step 6480 | Loss: 1.5275 | LR: 1.00e-05 +[2026-04-25 20:41:35] Epoch 1 | Step 6490 | Loss: 1.5276 | LR: 1.00e-05 +[2026-04-25 20:41:37] Epoch 1 | Step 6500 | Loss: 1.5271 | LR: 1.00e-05 +[2026-04-25 20:41:40] Epoch 1 | Step 6510 | Loss: 1.5267 | LR: 1.00e-05 +[2026-04-25 20:41:42] Epoch 1 | Step 6520 | Loss: 1.5264 | LR: 1.00e-05 +[2026-04-25 20:41:45] Epoch 1 | Step 6530 | Loss: 1.5260 | LR: 1.00e-05 +[2026-04-25 20:41:47] Epoch 1 | Step 6540 | Loss: 1.5257 | LR: 1.00e-05 +[2026-04-25 20:41:50] Epoch 1 | Step 6550 | Loss: 1.5254 | LR: 1.00e-05 +[2026-04-25 20:41:52] Epoch 1 | Step 6560 | Loss: 1.5251 | LR: 1.00e-05 +[2026-04-25 20:41:55] Epoch 1 | Step 6570 | Loss: 1.5250 | LR: 1.00e-05 +[2026-04-25 20:41:57] Epoch 1 | Step 6580 | Loss: 1.5248 | LR: 1.00e-05 +[2026-04-25 20:42:00] Epoch 1 | Step 6590 | Loss: 1.5244 | LR: 1.00e-05 +[2026-04-25 20:42:02] Epoch 1 | Step 6600 | Loss: 1.5242 | LR: 1.00e-05 +[2026-04-25 20:42:05] Epoch 1 | Step 6610 | Loss: 1.5240 | LR: 1.00e-05 +[2026-04-25 20:42:08] Epoch 1 | Step 6620 | Loss: 1.5238 | LR: 1.00e-05 +[2026-04-25 20:42:10] Epoch 1 | Step 6630 | Loss: 1.5234 | LR: 1.00e-05 +[2026-04-25 20:42:13] Epoch 1 | Step 6640 | Loss: 1.5232 | LR: 1.00e-05 +[2026-04-25 20:42:15] Epoch 1 | Step 6650 | Loss: 1.5231 | LR: 1.00e-05 +[2026-04-25 20:42:17] Epoch 1 | Step 6660 | Loss: 1.5227 | LR: 1.00e-05 +[2026-04-25 20:42:20] Epoch 1 | Step 6670 | Loss: 1.5226 | LR: 1.00e-05 +[2026-04-25 20:42:23] Epoch 1 | Step 6680 | Loss: 1.5224 | LR: 1.00e-05 +[2026-04-25 20:42:25] Epoch 1 | Step 6690 | Loss: 1.5223 | LR: 1.00e-05 +[2026-04-25 20:42:28] Epoch 1 | Step 6700 | Loss: 1.5221 | LR: 1.00e-05 +[2026-04-25 20:42:30] Epoch 1 | Step 6710 | Loss: 1.5220 | LR: 1.00e-05 +[2026-04-25 20:42:33] Epoch 1 | Step 6720 | Loss: 1.5218 | LR: 1.00e-05 +[2026-04-25 20:42:35] Epoch 1 | Step 6730 | Loss: 1.5218 | LR: 1.00e-05 +[2026-04-25 20:42:38] Epoch 1 | Step 6740 | Loss: 1.5215 | LR: 1.00e-05 +[2026-04-25 20:42:40] Epoch 1 | Step 6750 | Loss: 1.5212 | LR: 1.00e-05 +[2026-04-25 20:42:43] Epoch 1 | Step 6760 | Loss: 1.5211 | LR: 1.00e-05 +[2026-04-25 20:42:45] Epoch 1 | Step 6770 | Loss: 1.5208 | LR: 1.00e-05 +[2026-04-25 20:42:48] Epoch 1 | Step 6780 | Loss: 1.5207 | LR: 1.00e-05 +[2026-04-25 20:42:50] Epoch 1 | Step 6790 | Loss: 1.5207 | LR: 1.00e-05 +[2026-04-25 20:42:53] Epoch 1 | Step 6800 | Loss: 1.5206 | LR: 1.00e-05 +[2026-04-25 20:42:55] Epoch 1 | Step 6810 | Loss: 1.5205 | LR: 1.00e-05 +[2026-04-25 20:42:58] Epoch 1 | Step 6820 | Loss: 1.5205 | LR: 1.00e-05 +[2026-04-25 20:43:01] Epoch 1 | Step 6830 | Loss: 1.5205 | LR: 1.00e-05 +[2026-04-25 20:43:03] Epoch 1 | Step 6840 | Loss: 1.5205 | LR: 1.00e-05 +[2026-04-25 20:43:06] Epoch 1 | Step 6850 | Loss: 1.5203 | LR: 1.00e-05 +[2026-04-25 20:43:08] Epoch 1 | Step 6860 | Loss: 1.5201 | LR: 1.00e-05 +[2026-04-25 20:43:11] Epoch 1 | Step 6870 | Loss: 1.5199 | LR: 1.00e-05 +[2026-04-25 20:43:13] Epoch 1 | Step 6880 | Loss: 1.5196 | LR: 1.00e-05 +[2026-04-25 20:43:16] Epoch 1 | Step 6890 | Loss: 1.5197 | LR: 1.00e-05 +[2026-04-25 20:43:19] Epoch 1 | Step 6900 | Loss: 1.5195 | LR: 1.00e-05 +[2026-04-25 20:43:21] Epoch 1 | Step 6910 | Loss: 1.5190 | LR: 1.00e-05 +[2026-04-25 20:43:24] Epoch 1 | Step 6920 | Loss: 1.5189 | LR: 1.00e-05 +[2026-04-25 20:43:26] Epoch 1 | Step 6930 | Loss: 1.5190 | LR: 1.00e-05 +[2026-04-25 20:43:29] Epoch 1 | Step 6940 | Loss: 1.5187 | LR: 1.00e-05 +[2026-04-25 20:43:31] Epoch 1 | Step 6950 | Loss: 1.5184 | LR: 1.00e-05 +[2026-04-25 20:43:34] Epoch 1 | Step 6960 | Loss: 1.5183 | LR: 1.00e-05 +[2026-04-25 20:43:36] Epoch 1 | Step 6970 | Loss: 1.5180 | LR: 1.00e-05 +[2026-04-25 20:43:39] Epoch 1 | Step 6980 | Loss: 1.5179 | LR: 1.00e-05 +[2026-04-25 20:43:41] Epoch 1 | Step 6990 | Loss: 1.5174 | LR: 1.00e-05 +[2026-04-25 20:43:44] Epoch 1 | Step 7000 | Loss: 1.5172 | LR: 1.00e-05 +[2026-04-25 20:43:46] Epoch 1 | Step 7010 | Loss: 1.5169 | LR: 1.00e-05 +[2026-04-25 20:43:49] Epoch 1 | Step 7020 | Loss: 1.5169 | LR: 1.00e-05 +[2026-04-25 20:43:51] Epoch 1 | Step 7030 | Loss: 1.5167 | LR: 1.00e-05 +[2026-04-25 20:43:54] Epoch 1 | Step 7040 | Loss: 1.5167 | LR: 1.00e-05 +[2026-04-25 20:43:56] Epoch 1 | Step 7050 | Loss: 1.5164 | LR: 1.00e-05 +[2026-04-25 20:43:59] Epoch 1 | Step 7060 | Loss: 1.5162 | LR: 1.00e-05 +[2026-04-25 20:44:01] Epoch 1 | Step 7070 | Loss: 1.5163 | LR: 1.00e-05 +[2026-04-25 20:44:04] Epoch 1 | Step 7080 | Loss: 1.5159 | LR: 1.00e-05 +[2026-04-25 20:44:06] Epoch 1 | Step 7090 | Loss: 1.5159 | LR: 1.00e-05 +[2026-04-25 20:44:09] Epoch 1 | Step 7100 | Loss: 1.5155 | LR: 1.00e-05 +[2026-04-25 20:44:11] Epoch 1 | Step 7110 | Loss: 1.5152 | LR: 1.00e-05 +[2026-04-25 20:44:14] Epoch 1 | Step 7120 | Loss: 1.5153 | LR: 1.00e-05 +[2026-04-25 20:44:17] Epoch 1 | Step 7130 | Loss: 1.5149 | LR: 1.00e-05 +[2026-04-25 20:44:19] Epoch 1 | Step 7140 | Loss: 1.5146 | LR: 1.00e-05 +[2026-04-25 20:44:21] Epoch 1 | Step 7150 | Loss: 1.5147 | LR: 1.00e-05 +[2026-04-25 20:44:24] Epoch 1 | Step 7160 | Loss: 1.5143 | LR: 1.00e-05 +[2026-04-25 20:44:27] Epoch 1 | Step 7170 | Loss: 1.5141 | LR: 1.00e-05 +[2026-04-25 20:44:29] Epoch 1 | Step 7180 | Loss: 1.5141 | LR: 1.00e-05 +[2026-04-25 20:44:32] Epoch 1 | Step 7190 | Loss: 1.5141 | LR: 1.00e-05 +[2026-04-25 20:44:35] Epoch 1 | Step 7200 | Loss: 1.5139 | LR: 1.00e-05 +[2026-04-25 20:44:37] Epoch 1 | Step 7210 | Loss: 1.5135 | LR: 1.00e-05 +[2026-04-25 20:44:40] Epoch 1 | Step 7220 | Loss: 1.5135 | LR: 1.00e-05 +[2026-04-25 20:44:43] Epoch 1 | Step 7230 | Loss: 1.5134 | LR: 1.00e-05 +[2026-04-25 20:44:45] Epoch 1 | Step 7240 | Loss: 1.5132 | LR: 1.00e-05 +[2026-04-25 20:44:48] Epoch 1 | Step 7250 | Loss: 1.5131 | LR: 1.00e-05 +[2026-04-25 20:44:50] Epoch 1 | Step 7260 | Loss: 1.5128 | LR: 1.00e-05 +[2026-04-25 20:44:53] Epoch 1 | Step 7270 | Loss: 1.5128 | LR: 1.00e-05 +[2026-04-25 20:44:55] Epoch 1 | Step 7280 | Loss: 1.5128 | LR: 1.00e-05 +[2026-04-25 20:44:58] Epoch 1 | Step 7290 | Loss: 1.5124 | LR: 1.00e-05 +[2026-04-25 20:45:00] Epoch 1 | Step 7300 | Loss: 1.5122 | LR: 1.00e-05 +[2026-04-25 20:45:03] Epoch 1 | Step 7310 | Loss: 1.5119 | LR: 1.00e-05 +[2026-04-25 20:45:06] Epoch 1 | Step 7320 | Loss: 1.5115 | LR: 1.00e-05 +[2026-04-25 20:45:08] Epoch 1 | Step 7330 | Loss: 1.5114 | LR: 1.00e-05 +[2026-04-25 20:45:11] Epoch 1 | Step 7340 | Loss: 1.5114 | LR: 1.00e-05 +[2026-04-25 20:45:13] Epoch 1 | Step 7350 | Loss: 1.5114 | LR: 1.00e-05 +[2026-04-25 20:45:16] Epoch 1 | Step 7360 | Loss: 1.5110 | LR: 1.00e-05 +[2026-04-25 20:45:19] Epoch 1 | Step 7370 | Loss: 1.5106 | LR: 1.00e-05 +[2026-04-25 20:45:21] Epoch 1 | Step 7380 | Loss: 1.5103 | LR: 1.00e-05 +[2026-04-25 20:45:24] Epoch 1 | Step 7390 | Loss: 1.5100 | LR: 1.00e-05 +[2026-04-25 20:45:26] Epoch 1 | Step 7400 | Loss: 1.5097 | LR: 1.00e-05 +[2026-04-25 20:45:29] Epoch 1 | Step 7410 | Loss: 1.5098 | LR: 1.00e-05 +[2026-04-25 20:45:31] Epoch 1 | Step 7420 | Loss: 1.5097 | LR: 1.00e-05 +[2026-04-25 20:45:34] Epoch 1 | Step 7430 | Loss: 1.5094 | LR: 1.00e-05 +[2026-04-25 20:45:36] Epoch 1 | Step 7440 | Loss: 1.5093 | LR: 1.00e-05 +[2026-04-25 20:45:39] Epoch 1 | Step 7450 | Loss: 1.5090 | LR: 1.00e-05 +[2026-04-25 20:45:41] Epoch 1 | Step 7460 | Loss: 1.5088 | LR: 1.00e-05 +[2026-04-25 20:45:44] Epoch 1 | Step 7470 | Loss: 1.5087 | LR: 1.00e-05 +[2026-04-25 20:45:46] Epoch 1 | Step 7480 | Loss: 1.5085 | LR: 1.00e-05 +[2026-04-25 20:45:49] Epoch 1 | Step 7490 | Loss: 1.5085 | LR: 1.00e-05 +[2026-04-25 20:45:51] Epoch 1 | Step 7500 | Loss: 1.5085 | LR: 1.00e-05 +[2026-04-25 20:45:54] Epoch 1 | Step 7510 | Loss: 1.5084 | LR: 1.00e-05 +[2026-04-25 20:45:56] Epoch 1 | Step 7520 | Loss: 1.5082 | LR: 1.00e-05 +[2026-04-25 20:45:58] Epoch 1 | Step 7530 | Loss: 1.5080 | LR: 1.00e-05 +[2026-04-25 20:46:01] Epoch 1 | Step 7540 | Loss: 1.5078 | LR: 1.00e-05 +[2026-04-25 20:46:03] Epoch 1 | Step 7550 | Loss: 1.5077 | LR: 1.00e-05 +[2026-04-25 20:46:06] Epoch 1 | Step 7560 | Loss: 1.5075 | LR: 1.00e-05 +[2026-04-25 20:46:08] Epoch 1 | Step 7570 | Loss: 1.5074 | LR: 1.00e-05 +[2026-04-25 20:46:11] Epoch 1 | Step 7580 | Loss: 1.5071 | LR: 1.00e-05 +[2026-04-25 20:46:13] Epoch 1 | Step 7590 | Loss: 1.5069 | LR: 1.00e-05 +[2026-04-25 20:46:16] Epoch 1 | Step 7600 | Loss: 1.5068 | LR: 1.00e-05 +[2026-04-25 20:46:18] Epoch 1 | Step 7610 | Loss: 1.5065 | LR: 1.00e-05 +[2026-04-25 20:46:21] Epoch 1 | Step 7620 | Loss: 1.5062 | LR: 1.00e-05 +[2026-04-25 20:46:23] Epoch 1 | Step 7630 | Loss: 1.5060 | LR: 1.00e-05 +[2026-04-25 20:46:26] Epoch 1 | Step 7640 | Loss: 1.5058 | LR: 1.00e-05 +[2026-04-25 20:46:29] Epoch 1 | Step 7650 | Loss: 1.5054 | LR: 1.00e-05 +[2026-04-25 20:46:31] Epoch 1 | Step 7660 | Loss: 1.5052 | LR: 1.00e-05 +[2026-04-25 20:46:34] Epoch 1 | Step 7670 | Loss: 1.5049 | LR: 1.00e-05 +[2026-04-25 20:46:36] Epoch 1 | Step 7680 | Loss: 1.5047 | LR: 1.00e-05 +[2026-04-25 20:46:39] Epoch 1 | Step 7690 | Loss: 1.5048 | LR: 1.00e-05 +[2026-04-25 20:46:42] Epoch 1 | Step 7700 | Loss: 1.5044 | LR: 1.00e-05 +[2026-04-25 20:46:44] Epoch 1 | Step 7710 | Loss: 1.5040 | LR: 1.00e-05 +[2026-04-25 20:46:47] Epoch 1 | Step 7720 | Loss: 1.5041 | LR: 1.00e-05 +[2026-04-25 20:46:49] Epoch 1 | Step 7730 | Loss: 1.5041 | LR: 1.00e-05 +[2026-04-25 20:46:52] Epoch 1 | Step 7740 | Loss: 1.5041 | LR: 1.00e-05 +[2026-04-25 20:46:55] Epoch 1 | Step 7750 | Loss: 1.5040 | LR: 1.00e-05 +[2026-04-25 20:46:57] Epoch 1 | Step 7760 | Loss: 1.5037 | LR: 1.00e-05 +[2026-04-25 20:47:00] Epoch 1 | Step 7770 | Loss: 1.5035 | LR: 1.00e-05 +[2026-04-25 20:47:02] Epoch 1 | Step 7780 | Loss: 1.5032 | LR: 1.00e-05 +[2026-04-25 20:47:05] Epoch 1 | Step 7790 | Loss: 1.5031 | LR: 1.00e-05 +[2026-04-25 20:47:07] Epoch 1 | Step 7800 | Loss: 1.5028 | LR: 1.00e-05 +[2026-04-25 20:47:10] Epoch 1 | Step 7810 | Loss: 1.5029 | LR: 1.00e-05 +[2026-04-25 20:47:12] Epoch 1 | Step 7820 | Loss: 1.5028 | LR: 1.00e-05 +[2026-04-25 20:47:15] Epoch 1 | Step 7830 | Loss: 1.5027 | LR: 1.00e-05 +[2026-04-25 20:47:18] Epoch 1 | Step 7840 | Loss: 1.5024 | LR: 1.00e-05 +[2026-04-25 20:47:20] Epoch 1 | Step 7850 | Loss: 1.5020 | LR: 1.00e-05 +[2026-04-25 20:47:23] Epoch 1 | Step 7860 | Loss: 1.5019 | LR: 1.00e-05 +[2026-04-25 20:47:25] Epoch 1 | Step 7870 | Loss: 1.5016 | LR: 1.00e-05 +[2026-04-25 20:47:28] Epoch 1 | Step 7880 | Loss: 1.5016 | LR: 1.00e-05 +[2026-04-25 20:47:31] Epoch 1 | Step 7890 | Loss: 1.5013 | LR: 1.00e-05 +[2026-04-25 20:47:34] Epoch 1 | Step 7900 | Loss: 1.5012 | LR: 1.00e-05 +[2026-04-25 20:47:37] Epoch 1 | Step 7910 | Loss: 1.5013 | LR: 1.00e-05 +[2026-04-25 20:47:39] Epoch 1 | Step 7920 | Loss: 1.5011 | LR: 1.00e-05 +[2026-04-25 20:47:42] Epoch 1 | Step 7930 | Loss: 1.5011 | LR: 1.00e-05 +[2026-04-25 20:47:44] Epoch 1 | Step 7940 | Loss: 1.5010 | LR: 1.00e-05 +[2026-04-25 20:47:47] Epoch 1 | Step 7950 | Loss: 1.5010 | LR: 1.00e-05 +[2026-04-25 20:47:49] Epoch 1 | Step 7960 | Loss: 1.5009 | LR: 1.00e-05 +[2026-04-25 20:47:52] Epoch 1 | Step 7970 | Loss: 1.5008 | LR: 1.00e-05 +[2026-04-25 20:47:54] Epoch 1 | Step 7980 | Loss: 1.5005 | LR: 1.00e-05 +[2026-04-25 20:47:57] Epoch 1 | Step 7990 | Loss: 1.5003 | LR: 1.00e-05 +[2026-04-25 20:47:59] Epoch 1 | Step 8000 | Loss: 1.5001 | LR: 1.00e-05 +[2026-04-25 20:48:00] Validation | Batch 10/84 | Loss: 1.3175 +[2026-04-25 20:48:00] Validation | Batch 20/84 | Loss: 1.3445 +[2026-04-25 20:48:01] Validation | Batch 30/84 | Loss: 1.4386 +[2026-04-25 20:48:01] Validation | Batch 40/84 | Loss: 1.4415 +[2026-04-25 20:48:01] Validation | Batch 50/84 | Loss: 1.4190 +[2026-04-25 20:48:02] Validation | Batch 60/84 | Loss: 1.3877 +[2026-04-25 20:48:02] Validation | Batch 70/84 | Loss: 1.3681 +[2026-04-25 20:48:03] Validation | Batch 80/84 | Loss: 1.3739 +[2026-04-25 20:48:03] Validation | Batch 84/84 | Loss: 1.3637 +[2026-04-25 20:48:03] Validation | Loss: 1.3637 | PPL: 4.00 | Time: 3.78s +[2026-04-25 20:48:06] New best model saved! Val loss: 1.3637 +[2026-04-25 20:48:09] Epoch 1 | Step 8010 | Loss: 1.4999 | LR: 1.00e-05 +[2026-04-25 20:48:11] Epoch 1 | Step 8020 | Loss: 1.4995 | LR: 1.00e-05 +[2026-04-25 20:48:14] Epoch 1 | Step 8030 | Loss: 1.4992 | LR: 1.00e-05 +[2026-04-25 20:48:17] Epoch 1 | Step 8040 | Loss: 1.4993 | LR: 1.00e-05 +[2026-04-25 20:48:19] Epoch 1 | Step 8050 | Loss: 1.4990 | LR: 1.00e-05 +[2026-04-25 20:48:22] Epoch 1 | Step 8060 | Loss: 1.4988 | LR: 1.00e-05 +[2026-04-25 20:48:25] Epoch 1 | Step 8070 | Loss: 1.4986 | LR: 1.00e-05 +[2026-04-25 20:48:27] Epoch 1 | Step 8080 | Loss: 1.4985 | LR: 1.00e-05 +[2026-04-25 20:48:30] Epoch 1 | Step 8090 | Loss: 1.4982 | LR: 1.00e-05 +[2026-04-25 20:48:33] Epoch 1 | Step 8100 | Loss: 1.4980 | LR: 1.00e-05 +[2026-04-25 20:48:35] Epoch 1 | Step 8110 | Loss: 1.4981 | LR: 1.00e-05 +[2026-04-25 20:48:38] Epoch 1 | Step 8120 | Loss: 1.4979 | LR: 1.00e-05 +[2026-04-25 20:48:40] Epoch 1 | Step 8130 | Loss: 1.4977 | LR: 1.00e-05 +[2026-04-25 20:48:43] Epoch 1 | Step 8140 | Loss: 1.4977 | LR: 1.00e-05 +[2026-04-25 20:48:45] Epoch 1 | Step 8150 | Loss: 1.4976 | LR: 1.00e-05 +[2026-04-25 20:48:47] Epoch 1 | Step 8160 | Loss: 1.4972 | LR: 1.00e-05 +[2026-04-25 20:48:50] Epoch 1 | Step 8170 | Loss: 1.4971 | LR: 1.00e-05 +[2026-04-25 20:48:53] Epoch 1 | Step 8180 | Loss: 1.4970 | LR: 1.00e-05 +[2026-04-25 20:48:55] Epoch 1 | Step 8190 | Loss: 1.4967 | LR: 1.00e-05 +[2026-04-25 20:48:58] Epoch 1 | Step 8200 | Loss: 1.4967 | LR: 1.00e-05 +[2026-04-25 20:49:00] Epoch 1 | Step 8210 | Loss: 1.4965 | LR: 1.00e-05 +[2026-04-25 20:49:03] Epoch 1 | Step 8220 | Loss: 1.4965 | LR: 1.00e-05 +[2026-04-25 20:49:05] Epoch 1 | Step 8230 | Loss: 1.4964 | LR: 1.00e-05 +[2026-04-25 20:49:08] Epoch 1 | Step 8240 | Loss: 1.4964 | LR: 1.00e-05 +[2026-04-25 20:49:10] Epoch 1 | Step 8250 | Loss: 1.4962 | LR: 1.00e-05 +[2026-04-25 20:49:13] Epoch 1 | Step 8260 | Loss: 1.4962 | LR: 1.00e-05 +[2026-04-25 20:49:15] Epoch 1 | Step 8270 | Loss: 1.4962 | LR: 1.00e-05 +[2026-04-25 20:49:18] Epoch 1 | Step 8280 | Loss: 1.4962 | LR: 1.00e-05 +[2026-04-25 20:49:21] Epoch 1 | Step 8290 | Loss: 1.4962 | LR: 1.00e-05 +[2026-04-25 20:49:23] Epoch 1 | Step 8300 | Loss: 1.4958 | LR: 1.00e-05 +[2026-04-25 20:49:25] Epoch 1 | Step 8310 | Loss: 1.4958 | LR: 1.00e-05 +[2026-04-25 20:49:28] Epoch 1 | Step 8320 | Loss: 1.4957 | LR: 1.00e-05 +[2026-04-25 20:49:30] Epoch 1 | Step 8330 | Loss: 1.4957 | LR: 1.00e-05 +[2026-04-25 20:49:33] Epoch 1 | Step 8340 | Loss: 1.4955 | LR: 1.00e-05 +[2026-04-25 20:49:35] Epoch 1 | Step 8350 | Loss: 1.4953 | LR: 1.00e-05 +[2026-04-25 20:49:38] Epoch 1 | Step 8360 | Loss: 1.4951 | LR: 1.00e-05 +[2026-04-25 20:49:40] Epoch 1 | Step 8370 | Loss: 1.4950 | LR: 1.00e-05 +[2026-04-25 20:49:43] Epoch 1 | Step 8380 | Loss: 1.4948 | LR: 1.00e-05 +[2026-04-25 20:49:45] Epoch 1 | Step 8390 | Loss: 1.4948 | LR: 1.00e-05 +[2026-04-25 20:49:48] Epoch 1 | Step 8400 | Loss: 1.4946 | LR: 1.00e-05 +[2026-04-25 20:49:50] Epoch 1 | Step 8410 | Loss: 1.4945 | LR: 1.00e-05 +[2026-04-25 20:49:53] Epoch 1 | Step 8420 | Loss: 1.4945 | LR: 1.00e-05 +[2026-04-25 20:49:55] Epoch 1 | Step 8430 | Loss: 1.4942 | LR: 1.00e-05 +[2026-04-25 20:49:58] Epoch 1 | Step 8440 | Loss: 1.4941 | LR: 1.00e-05 +[2026-04-25 20:50:00] Epoch 1 | Step 8450 | Loss: 1.4940 | LR: 1.00e-05 +[2026-04-25 20:50:03] Epoch 1 | Step 8460 | Loss: 1.4939 | LR: 1.00e-05 +[2026-04-25 20:50:05] Epoch 1 | Step 8470 | Loss: 1.4937 | LR: 1.00e-05 +[2026-04-25 20:50:08] Epoch 1 | Step 8480 | Loss: 1.4935 | LR: 1.00e-05 +[2026-04-25 20:50:10] Epoch 1 | Step 8490 | Loss: 1.4932 | LR: 1.00e-05 +[2026-04-25 20:50:13] Epoch 1 | Step 8500 | Loss: 1.4931 | LR: 1.00e-05 +[2026-04-25 20:50:15] Epoch 1 | Step 8510 | Loss: 1.4929 | LR: 1.00e-05 +[2026-04-25 20:50:18] Epoch 1 | Step 8520 | Loss: 1.4929 | LR: 1.00e-05 +[2026-04-25 20:50:20] Epoch 1 | Step 8530 | Loss: 1.4927 | LR: 1.00e-05 +[2026-04-25 20:50:23] Epoch 1 | Step 8540 | Loss: 1.4928 | LR: 1.00e-05 +[2026-04-25 20:50:26] Epoch 1 | Step 8550 | Loss: 1.4926 | LR: 1.00e-05 +[2026-04-25 20:50:28] Epoch 1 | Step 8560 | Loss: 1.4925 | LR: 1.00e-05 +[2026-04-25 20:50:30] Epoch 1 | Step 8570 | Loss: 1.4924 | LR: 1.00e-05 +[2026-04-25 20:50:33] Epoch 1 | Step 8580 | Loss: 1.4921 | LR: 1.00e-05 +[2026-04-25 20:50:36] Epoch 1 | Step 8590 | Loss: 1.4918 | LR: 1.00e-05 +[2026-04-25 20:50:38] Epoch 1 | Step 8600 | Loss: 1.4916 | LR: 1.00e-05 +[2026-04-25 20:50:41] Epoch 1 | Step 8610 | Loss: 1.4916 | LR: 1.00e-05 +[2026-04-25 20:50:43] Epoch 1 | Step 8620 | Loss: 1.4914 | LR: 1.00e-05 +[2026-04-25 20:50:46] Epoch 1 | Step 8630 | Loss: 1.4910 | LR: 1.00e-05 +[2026-04-25 20:50:48] Epoch 1 | Step 8640 | Loss: 1.4911 | LR: 1.00e-05 +[2026-04-25 20:50:51] Epoch 1 | Step 8650 | Loss: 1.4910 | LR: 1.00e-05 +[2026-04-25 20:50:53] Epoch 1 | Step 8660 | Loss: 1.4907 | LR: 1.00e-05 +[2026-04-25 20:50:56] Epoch 1 | Step 8670 | Loss: 1.4907 | LR: 1.00e-05 +[2026-04-25 20:50:58] Epoch 1 | Step 8680 | Loss: 1.4907 | LR: 1.00e-05 +[2026-04-25 20:51:01] Epoch 1 | Step 8690 | Loss: 1.4903 | LR: 1.00e-05 +[2026-04-25 20:51:03] Epoch 1 | Step 8700 | Loss: 1.4902 | LR: 1.00e-05 +[2026-04-25 20:51:06] Epoch 1 | Step 8710 | Loss: 1.4899 | LR: 1.00e-05 +[2026-04-25 20:51:09] Epoch 1 | Step 8720 | Loss: 1.4897 | LR: 1.00e-05 +[2026-04-25 20:51:11] Epoch 1 | Step 8730 | Loss: 1.4896 | LR: 1.00e-05 +[2026-04-25 20:51:14] Epoch 1 | Step 8740 | Loss: 1.4897 | LR: 1.00e-05 +[2026-04-25 20:51:16] Epoch 1 | Step 8750 | Loss: 1.4895 | LR: 1.00e-05 +[2026-04-25 20:51:19] Epoch 1 | Step 8760 | Loss: 1.4894 | LR: 1.00e-05 +[2026-04-25 20:51:21] Epoch 1 | Step 8770 | Loss: 1.4891 | LR: 1.00e-05 +[2026-04-25 20:51:24] Epoch 1 | Step 8780 | Loss: 1.4889 | LR: 1.00e-05 +[2026-04-25 20:51:27] Epoch 1 | Step 8790 | Loss: 1.4888 | LR: 1.00e-05 +[2026-04-25 20:51:29] Epoch 1 | Step 8800 | Loss: 1.4885 | LR: 1.00e-05 +[2026-04-25 20:51:32] Epoch 1 | Step 8810 | Loss: 1.4884 | LR: 1.00e-05 +[2026-04-25 20:51:35] Epoch 1 | Step 8820 | Loss: 1.4881 | LR: 1.00e-05 +[2026-04-25 20:51:37] Epoch 1 | Step 8830 | Loss: 1.4881 | LR: 1.00e-05 +[2026-04-25 20:51:40] Epoch 1 | Step 8840 | Loss: 1.4879 | LR: 1.00e-05 +[2026-04-25 20:51:42] Epoch 1 | Step 8850 | Loss: 1.4878 | LR: 1.00e-05 +[2026-04-25 20:51:45] Epoch 1 | Step 8860 | Loss: 1.4877 | LR: 1.00e-05 +[2026-04-25 20:51:47] Epoch 1 | Step 8870 | Loss: 1.4877 | LR: 1.00e-05 +[2026-04-25 20:51:50] Epoch 1 | Step 8880 | Loss: 1.4876 | LR: 1.00e-05 +[2026-04-25 20:51:52] Epoch 1 | Step 8890 | Loss: 1.4874 | LR: 1.00e-05 +[2026-04-25 20:51:55] Epoch 1 | Step 8900 | Loss: 1.4869 | LR: 1.00e-05 +[2026-04-25 20:51:57] Epoch 1 | Step 8910 | Loss: 1.4871 | LR: 1.00e-05 +[2026-04-25 20:52:00] Epoch 1 | Step 8920 | Loss: 1.4866 | LR: 1.00e-05 +[2026-04-25 20:52:02] Epoch 1 | Step 8930 | Loss: 1.4864 | LR: 1.00e-05 +[2026-04-25 20:52:05] Epoch 1 | Step 8940 | Loss: 1.4864 | LR: 1.00e-05 +[2026-04-25 20:52:07] Epoch 1 | Step 8950 | Loss: 1.4864 | LR: 1.00e-05 +[2026-04-25 20:52:10] Epoch 1 | Step 8960 | Loss: 1.4863 | LR: 1.00e-05 +[2026-04-25 20:52:13] Epoch 1 | Step 8970 | Loss: 1.4862 | LR: 1.00e-05 +[2026-04-25 20:52:15] Epoch 1 | Step 8980 | Loss: 1.4860 | LR: 1.00e-05 +[2026-04-25 20:52:18] Epoch 1 | Step 8990 | Loss: 1.4858 | LR: 1.00e-05 +[2026-04-25 20:52:20] Epoch 1 | Step 9000 | Loss: 1.4857 | LR: 1.00e-05 +[2026-04-25 20:52:23] Epoch 1 | Step 9010 | Loss: 1.4857 | LR: 1.00e-05 +[2026-04-25 20:52:26] Epoch 1 | Step 9020 | Loss: 1.4857 | LR: 1.00e-05 +[2026-04-25 20:52:28] Epoch 1 | Step 9030 | Loss: 1.4856 | LR: 1.00e-05 +[2026-04-25 20:52:30] Epoch 1 | Step 9040 | Loss: 1.4854 | LR: 1.00e-05 +[2026-04-25 20:52:33] Epoch 1 | Step 9050 | Loss: 1.4852 | LR: 1.00e-05 +[2026-04-25 20:52:35] Epoch 1 | Step 9060 | Loss: 1.4852 | LR: 1.00e-05 +[2026-04-25 20:52:38] Epoch 1 | Step 9070 | Loss: 1.4849 | LR: 1.00e-05 +[2026-04-25 20:52:40] Epoch 1 | Step 9080 | Loss: 1.4848 | LR: 1.00e-05 +[2026-04-25 20:52:43] Epoch 1 | Step 9090 | Loss: 1.4846 | LR: 1.00e-05 +[2026-04-25 20:52:45] Epoch 1 | Step 9100 | Loss: 1.4846 | LR: 1.00e-05 +[2026-04-25 20:52:48] Epoch 1 | Step 9110 | Loss: 1.4845 | LR: 1.00e-05 +[2026-04-25 20:52:50] Epoch 1 | Step 9120 | Loss: 1.4845 | LR: 1.00e-05 +[2026-04-25 20:52:53] Epoch 1 | Step 9130 | Loss: 1.4843 | LR: 1.00e-05 +[2026-04-25 20:52:55] Epoch 1 | Step 9140 | Loss: 1.4842 | LR: 1.00e-05 +[2026-04-25 20:52:57] Epoch 1 | Step 9150 | Loss: 1.4842 | LR: 1.00e-05 +[2026-04-25 20:53:00] Epoch 1 | Step 9160 | Loss: 1.4840 | LR: 1.00e-05 +[2026-04-25 20:53:03] Epoch 1 | Step 9170 | Loss: 1.4836 | LR: 1.00e-05 +[2026-04-25 20:53:05] Epoch 1 | Step 9180 | Loss: 1.4835 | LR: 1.00e-05 +[2026-04-25 20:53:08] Epoch 1 | Step 9190 | Loss: 1.4831 | LR: 1.00e-05 +[2026-04-25 20:53:10] Epoch 1 | Step 9200 | Loss: 1.4830 | LR: 1.00e-05 +[2026-04-25 20:53:13] Epoch 1 | Step 9210 | Loss: 1.4829 | LR: 1.00e-05 +[2026-04-25 20:53:16] Epoch 1 | Step 9220 | Loss: 1.4827 | LR: 1.00e-05 +[2026-04-25 20:53:18] Epoch 1 | Step 9230 | Loss: 1.4826 | LR: 1.00e-05 +[2026-04-25 20:53:21] Epoch 1 | Step 9240 | Loss: 1.4822 | LR: 1.00e-05 +[2026-04-25 20:53:23] Epoch 1 | Step 9250 | Loss: 1.4820 | LR: 1.00e-05 +[2026-04-25 20:53:26] Epoch 1 | Step 9260 | Loss: 1.4817 | LR: 1.00e-05 +[2026-04-25 20:53:29] Epoch 1 | Step 9270 | Loss: 1.4816 | LR: 1.00e-05 +[2026-04-25 20:53:31] Epoch 1 | Step 9280 | Loss: 1.4815 | LR: 1.00e-05 +[2026-04-25 20:53:34] Epoch 1 | Step 9290 | Loss: 1.4814 | LR: 1.00e-05 +[2026-04-25 20:53:36] Epoch 1 | Step 9300 | Loss: 1.4813 | LR: 1.00e-05 +[2026-04-25 20:53:39] Epoch 1 | Step 9310 | Loss: 1.4812 | LR: 1.00e-05 +[2026-04-25 20:53:41] Epoch 1 | Step 9320 | Loss: 1.4811 | LR: 1.00e-05 +[2026-04-25 20:53:44] Epoch 1 | Step 9330 | Loss: 1.4810 | LR: 1.00e-05 +[2026-04-25 20:53:46] Epoch 1 | Step 9340 | Loss: 1.4807 | LR: 1.00e-05 +[2026-04-25 20:53:49] Epoch 1 | Step 9350 | Loss: 1.4806 | LR: 1.00e-05 +[2026-04-25 20:53:51] Epoch 1 | Step 9360 | Loss: 1.4805 | LR: 1.00e-05 +[2026-04-25 20:53:54] Epoch 1 | Step 9370 | Loss: 1.4804 | LR: 1.00e-05 +[2026-04-25 20:53:56] Epoch 1 | Step 9380 | Loss: 1.4803 | LR: 1.00e-05 +[2026-04-25 20:53:59] Epoch 1 | Step 9390 | Loss: 1.4799 | LR: 1.00e-05 +[2026-04-25 20:54:01] Epoch 1 | Step 9400 | Loss: 1.4800 | LR: 1.00e-05 +[2026-04-25 20:54:04] Epoch 1 | Step 9410 | Loss: 1.4799 | LR: 1.00e-05 +[2026-04-25 20:54:07] Epoch 1 | Step 9420 | Loss: 1.4800 | LR: 1.00e-05 +[2026-04-25 20:54:09] Epoch 1 | Step 9430 | Loss: 1.4799 | LR: 1.00e-05 +[2026-04-25 20:54:12] Epoch 1 | Step 9440 | Loss: 1.4798 | LR: 1.00e-05 +[2026-04-25 20:54:14] Epoch 1 | Step 9450 | Loss: 1.4798 | LR: 1.00e-05 +[2026-04-25 20:54:17] Epoch 1 | Step 9460 | Loss: 1.4796 | LR: 1.00e-05 +[2026-04-25 20:54:19] Epoch 1 | Step 9470 | Loss: 1.4793 | LR: 1.00e-05 +[2026-04-25 20:54:22] Epoch 1 | Step 9480 | Loss: 1.4790 | LR: 1.00e-05 +[2026-04-25 20:54:24] Epoch 1 | Step 9490 | Loss: 1.4790 | LR: 1.00e-05 +[2026-04-25 20:54:27] Epoch 1 | Step 9500 | Loss: 1.4788 | LR: 1.00e-05 +[2026-04-25 20:54:30] Epoch 1 | Step 9510 | Loss: 1.4787 | LR: 1.00e-05 +[2026-04-25 20:54:32] Epoch 1 | Step 9520 | Loss: 1.4785 | LR: 1.00e-05 +[2026-04-25 20:54:35] Epoch 1 | Step 9530 | Loss: 1.4785 | LR: 1.00e-05 +[2026-04-25 20:54:37] Epoch 1 | Step 9540 | Loss: 1.4782 | LR: 1.00e-05 +[2026-04-25 20:54:40] Epoch 1 | Step 9550 | Loss: 1.4781 | LR: 1.00e-05 +[2026-04-25 20:54:42] Epoch 1 | Step 9560 | Loss: 1.4781 | LR: 1.00e-05 +[2026-04-25 20:54:45] Epoch 1 | Step 9570 | Loss: 1.4781 | LR: 1.00e-05 +[2026-04-25 20:54:47] Epoch 1 | Step 9580 | Loss: 1.4781 | LR: 1.00e-05 +[2026-04-25 20:54:50] Epoch 1 | Step 9590 | Loss: 1.4780 | LR: 1.00e-05 +[2026-04-25 20:54:53] Epoch 1 | Step 9600 | Loss: 1.4778 | LR: 1.00e-05 +[2026-04-25 20:54:56] Epoch 1 | Step 9610 | Loss: 1.4776 | LR: 1.00e-05 +[2026-04-25 20:54:58] Epoch 1 | Step 9620 | Loss: 1.4776 | LR: 1.00e-05 +[2026-04-25 20:55:01] Epoch 1 | Step 9630 | Loss: 1.4776 | LR: 1.00e-05 +[2026-04-25 20:55:04] Epoch 1 | Step 9640 | Loss: 1.4775 | LR: 1.00e-05 +[2026-04-25 20:55:07] Epoch 1 | Step 9650 | Loss: 1.4773 | LR: 1.00e-05 +[2026-04-25 20:55:09] Epoch 1 | Step 9660 | Loss: 1.4772 | LR: 1.00e-05 +[2026-04-25 20:55:12] Epoch 1 | Step 9670 | Loss: 1.4772 | LR: 1.00e-05 +[2026-04-25 20:55:14] Epoch 1 | Step 9680 | Loss: 1.4770 | LR: 1.00e-05 +[2026-04-25 20:55:17] Epoch 1 | Step 9690 | Loss: 1.4769 | LR: 1.00e-05 +[2026-04-25 20:55:19] Epoch 1 | Step 9700 | Loss: 1.4768 | LR: 1.00e-05 +[2026-04-25 20:55:22] Epoch 1 | Step 9710 | Loss: 1.4767 | LR: 1.00e-05 +[2026-04-25 20:55:24] Epoch 1 | Step 9720 | Loss: 1.4766 | LR: 1.00e-05 +[2026-04-25 20:55:27] Epoch 1 | Step 9730 | Loss: 1.4766 | LR: 1.00e-05 +[2026-04-25 20:55:29] Epoch 1 | Step 9740 | Loss: 1.4765 | LR: 1.00e-05 +[2026-04-25 20:55:32] Epoch 1 | Step 9750 | Loss: 1.4763 | LR: 1.00e-05 +[2026-04-25 20:55:34] Epoch 1 | Step 9760 | Loss: 1.4762 | LR: 1.00e-05 +[2026-04-25 20:55:37] Epoch 1 | Step 9770 | Loss: 1.4761 | LR: 1.00e-05 +[2026-04-25 20:55:39] Epoch 1 | Step 9780 | Loss: 1.4759 | LR: 1.00e-05 +[2026-04-25 20:55:42] Epoch 1 | Step 9790 | Loss: 1.4757 | LR: 1.00e-05 +[2026-04-25 20:55:45] Epoch 1 | Step 9800 | Loss: 1.4757 | LR: 1.00e-05 +[2026-04-25 20:55:47] Epoch 1 | Step 9810 | Loss: 1.4755 | LR: 1.00e-05 +[2026-04-25 20:55:50] Epoch 1 | Step 9820 | Loss: 1.4754 | LR: 1.00e-05 +[2026-04-25 20:55:53] Epoch 1 | Step 9830 | Loss: 1.4753 | LR: 1.00e-05 +[2026-04-25 20:55:55] Epoch 1 | Step 9840 | Loss: 1.4754 | LR: 1.00e-05 +[2026-04-25 20:55:58] Epoch 1 | Step 9850 | Loss: 1.4752 | LR: 1.00e-05 +[2026-04-25 20:56:00] Epoch 1 | Step 9860 | Loss: 1.4749 | LR: 1.00e-05 +[2026-04-25 20:56:03] Epoch 1 | Step 9870 | Loss: 1.4749 | LR: 1.00e-05 +[2026-04-25 20:56:05] Epoch 1 | Step 9880 | Loss: 1.4748 | LR: 1.00e-05 +[2026-04-25 20:56:08] Epoch 1 completed in 2548.63s | Loss: 1.4748 +[2026-04-25 20:56:08] +Training completed! +[2026-04-25 20:56:10] Final model: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4/model_final.pt \ No newline at end of file diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/requirements.txt b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..f040f697230340f8a88a6e7387f7e8983d11b547 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/requirements.txt @@ -0,0 +1,245 @@ +setuptools==78.1.1 +wheel==0.45.1 +pip==25.2 +webencodings==0.5.1 +triton==3.2.0 +pytz==2025.2 +pydub==0.25.1 +pure_eval==0.2.3 +ptyprocess==0.7.0 +nvidia-ml-py==13.590.48 +nvidia-cusparselt-cu12==0.6.2 +mpmath==1.3.0 +ipython-genutils==0.2.0 +fastjsonschema==2.21.2 +brotli==1.2.0 +antlr4-python3-runtime==4.9.3 +xxhash==3.6.0 +widgetsnbextension==4.0.14 +websocket-client==1.9.0 +webcolors==24.11.1 +wcwidth==0.2.14 +urllib3==2.5.0 +uri-template==1.3.0 +tzdata==2025.2 +typing_extensions==4.15.0 +types-python-dateutil==2.9.0.20251008 +traitlets==5.14.3 +tqdm==4.67.1 +tornado==6.5.2 +tomlkit==0.13.3 +tinycss2==1.4.0 +tabulate==0.9.0 +sympy==1.13.1 +soupsieve==2.8 +sniffio==1.3.1 +smmap==5.0.2 +six==1.17.0 +shellingham==1.5.4 +Send2Trash==1.8.3 +semantic-version==2.10.0 +safetensors==0.6.2 +rpds-py==0.27.1 +rfc3986-validator==0.1.1 +regex==2025.9.18 +pyzmq==27.1.0 +PyYAML==6.0.3 +python-multipart==0.0.22 +python-json-logger==4.0.0 +python-dotenv==1.2.1 +pyparsing==3.2.5 +PyJWT==2.8.0 +Pygments==2.19.2 +pycparser==2.23 +pyarrow==22.0.0 +psutil==7.1.0 +protobuf==6.33.4 +propcache==0.4.1 +prometheus_client==0.23.1 +portalocker==3.2.0 +platformdirs==4.5.0 +pillow==11.3.0 +pexpect==4.9.0 +pathspec==1.0.4 +parso==0.8.5 +pandocfilters==1.5.1 +packaging==25.0 +orjson==3.11.6 +opt_einsum==3.4.0 +nvidia-nvtx-cu12==12.4.127 +nvidia-nvjitlink-cu12==12.4.127 +nvidia-nccl-cu12==2.21.5 +nvidia-curand-cu12==10.3.5.147 +nvidia-cufile-cu12==1.13.1.3 +nvidia-cufft-cu12==11.2.1.3 +nvidia-cuda-runtime-cu12==12.4.127 +nvidia-cuda-nvrtc-cu12==12.4.127 +nvidia-cuda-cupti-cu12==12.4.127 +nvidia-cublas-cu12==12.4.5.8 +numpy==2.3.3 +ninja==1.13.0 +networkx==3.5 +nest-asyncio==1.6.0 +narwhals==2.15.0 +mypy_extensions==1.1.0 +multidict==6.7.0 +mistune==3.1.4 +mdurl==0.1.2 +MarkupSafe==3.0.3 +lxml==6.0.2 +librt==0.8.0 +lark==1.3.0 +kiwisolver==1.4.9 +jupyterlab_widgets==3.0.15 +jupyterlab_pygments==0.3.0 +jsonpointer==3.0.0 +json5==0.12.1 +itsdangerous==2.2.0 +idna==3.10 +hf-xet==1.1.10 +h11==0.16.0 +groovy==0.1.2 +fsspec==2025.9.0 +frozenlist==1.8.0 +fqdn==1.5.1 +fonttools==4.60.1 +filelock==3.19.1 +ffmpy==1.0.0 +executing==2.2.1 +einops==0.8.1 +dill==0.4.0 +defusedxml==0.7.1 +decorator==5.2.1 +debugpy==1.8.17 +dacite==1.9.2 +cycler==0.12.1 +comm==0.2.3 +colorama==0.4.6 +click==8.3.1 +charset-normalizer==3.4.3 +certifi==2025.10.5 +bleach==6.2.0 +babel==2.17.0 +attrs==25.4.0 +async-lru==2.0.5 +asttokens==3.0.0 +annotated-types==0.7.0 +annotated-doc==0.0.4 +aiohappyeyeballs==2.6.1 +aiofiles==24.1.0 +yarl==1.22.0 +uvicorn==0.40.0 +typing-inspection==0.4.2 +terminado==0.18.1 +stack-data==0.6.3 +sentry-sdk==2.50.0 +scipy==1.17.0 +sacrebleu==2.6.0 +rfc3987-syntax==1.1.0 +rfc3339-validator==0.1.4 +requests==2.32.5 +reportlab==4.4.9 +referencing==0.36.2 +python-dateutil==2.9.0.post0 +pydantic_core==2.41.5 +prompt_toolkit==3.0.52 +plotly==6.5.2 +pathlib2==2.3.7.post1 +orderedmultidict==1.0.2 +optree==0.17.0 +omegaconf==2.3.0 +nvidia-cusparse-cu12==12.3.1.170 +nvidia-cudnn-cu12==9.1.0.70 +mypy==1.19.1 +multiprocess==0.70.16 +matplotlib-inline==0.1.7 +markdown-it-py==4.0.0 +jupyter_core==5.8.1 +Jinja2==3.1.6 +jedi==0.19.2 +ipython_pygments_lexers==1.1.1 +httpcore==1.0.9 +gitdb==4.0.12 +ftfy==6.3.1 +contourpy==1.3.3 +cffi==2.0.0 +beautifulsoup4==4.14.2 +anyio==4.11.0 +aiosignal==1.4.0 +starlette==0.50.0 +rich==14.2.0 +pydantic==2.12.5 +pandas==2.3.3 +nvidia-cusolver-cu12==11.6.1.9 +matplotlib==3.10.7 +jupyter_server_terminals==0.5.3 +jupyter_client==8.6.3 +jsonschema-specifications==2025.9.1 +ipython==9.6.0 +hydra-core==1.3.2 +huggingface-hub==0.35.3 +httpx==0.28.1 +GitPython==3.1.46 +furl==2.1.4 +cryptography==46.0.4 +arrow==1.3.0 +argon2-cffi-bindings==25.1.0 +aiohttp==3.13.1 +wandb==0.24.0 +typer==0.21.1 +torch==2.6.0 +tokenizers==0.22.1 +seaborn==0.13.2 +safehttpx==0.1.7 +jsonschema==4.25.1 +joypy==0.2.6 +isoduration==20.11.0 +ipywidgets==8.1.7 +ipykernel==6.30.1 +gradio_client==2.0.3 +fastapi==0.128.0 +Authlib==1.6.6 +argon2-cffi==25.1.0 +transformers==4.57.6 +nbformat==5.10.4 +mlstm_kernels==2.0.2 +jupyter-console==6.6.3 +gradio==6.5.1 +datasets==4.3.0 +clearml==1.16.4 +accelerate==1.10.1 +xlstm==2.0.4 +nbclient==0.10.2 +jupyter-events==0.12.0 +trackio==0.15.0 +nbconvert==7.16.6 +jupyter_server==2.17.0 +notebook_shim==0.2.4 +jupyterlab_server==2.27.3 +jupyter-lsp==2.3.0 +nbclassic==1.3.3 +jupyterlab==4.4.9 +notebook==7.4.7 +jupyter_contrib_core==0.4.2 +jupyter==1.1.1 +jupyter_nbextensions_configurator==0.6.4 +causal-conv1d==1.5.0.post8 +flash_attn==2.7.4.post1 +mamba-ssm==2.2.4 +hnet==0.0.1 +autocommand==2.2.2 +backports.tarfile==1.2.0 +importlib_metadata==8.0.0 +inflect==7.3.1 +jaraco.collections==5.1.0 +jaraco.context==5.3.0 +jaraco.functools==4.0.1 +jaraco.text==3.12.1 +more-itertools==10.3.0 +packaging==24.2 +platformdirs==4.2.2 +tomli==2.0.1 +typeguard==4.3.0 +typing_extensions==4.12.2 +wheel==0.45.1 +zipp==3.19.2 diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/wandb-metadata.json b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..abe6c04213f94839d3fe40ffbcb565357d21504e --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/wandb-metadata.json @@ -0,0 +1 @@ +{"os": "Linux-5.4.0-176-generic-x86_64-with-glibc2.35", "python": "CPython 3.12.0", "started_at": "2026-04-25T20:13:33.215817Z", "args": ["tracking=wandb", "tracking.project=code-completion_lr-sweep", "tracking.run_name=pythia_1b_lr_1e-4", "training.lr=1e-4", "paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4", "model=pythia_1b", "data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full"], "program": "/workspace/byte-llms-code/code_completion_exp/train_pythia/train.py", "code_path": "code_completion_exp/train_pythia/train.py", "code_path_local": "train.py", "git": {"remote_url": "https://github.com/naryst/byte-llms-code.git", "commit": "f111e13281aa0dc58e24302edab5b0d5c2024586"}, "email": "nikita@local.ru", "root": "/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4", "host": "7504e518d24a", "executable": "/venv/bytellm/bin/python", "cpu_count": 64, "cpu_count_logical": 128, "gpu_type": "NVIDIA H100 80GB HBM3", "gpu_count": 4, "disk": {"/": {"total": "265214230528", "used": "104089939968"}}, "memory": {"total": "1081679683584"}, "gpu_nvidia": [{"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-b60cdcab-2033-2009-41de-be646c953a20"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-9982b420-4520-4238-c378-ec5a46015474"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f"}, {"name": "NVIDIA H100 80GB HBM3", "memory_total": "85520809984", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134"}], "cuda_version": "12.2", "writer_id": "4o5msocvznwsbtdeapabjlonbytfxwvk"} \ No newline at end of file diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/wandb-summary.json b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..027c6a5079fbb226170de12a8d883c52624fde0b --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/files/wandb-summary.json @@ -0,0 +1 @@ +{"_runtime": 2555, "train/lr": 1e-05, "train/epoch": 1, "train/step_time": 0.25381178855895997, "_timestamp": 1777150568.003006, "_step": 9880, "train/loss": 0.9175188094377518, "train/loss_avg": 1.474775589316417, "val/time": 3.78352689743042, "best/val_perplexity": 4.003811303413088, "best/step": 8000, "val/loss": 1.3637330517882393, "val/perplexity": 4.003811303413088, "best/val_loss": 1.3637330517882393, "epoch/time": 2548.628220796585, "epoch/loss": 1.4747612875254155} \ No newline at end of file diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-core.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-core.log new file mode 100644 index 0000000000000000000000000000000000000000..d45203d4abf65f19d48755552da95c35e66974f9 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-core.log @@ -0,0 +1,16 @@ +{"time":"2026-04-25T20:13:33.300487807Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpezwuy1zk/port-129801.txt","pid":129801,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false} +{"time":"2026-04-25T20:13:33.300868169Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":129801} +{"time":"2026-04-25T20:13:33.300876916Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-129801-129861-447022468/socket","Net":"unix"}} +{"time":"2026-04-25T20:13:33.489131596Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"} +{"time":"2026-04-25T20:13:33.51225187Z","level":"INFO","msg":"handleInformInit: received","streamId":"p8ozhgpm","id":"1(@)"} +{"time":"2026-04-25T20:13:34.125372456Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"p8ozhgpm","id":"1(@)"} +{"time":"2026-04-25T20:56:12.14256331Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"p8ozhgpm","id":"1(@)"} +{"time":"2026-04-25T20:56:12.143054649Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"p8ozhgpm","id":"1(@)"} +{"time":"2026-04-25T20:56:12.160045806Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"} +{"time":"2026-04-25T20:56:12.160068491Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"} +{"time":"2026-04-25T20:56:12.160074131Z","level":"INFO","msg":"server is shutting down"} +{"time":"2026-04-25T20:56:12.160078748Z","level":"INFO","msg":"connection: closing","id":"1(@)"} +{"time":"2026-04-25T20:56:12.160129552Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"} +{"time":"2026-04-25T20:56:12.16014455Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"} +{"time":"2026-04-25T20:56:12.160127343Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-129801-129861-447022468/socket","Net":"unix"}} +{"time":"2026-04-25T20:56:12.160152608Z","level":"INFO","msg":"server is closed"} diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-internal.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..cfcffa249b5b679b1a2090f4f5cb51ce0da411e6 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-internal.log @@ -0,0 +1,15 @@ +{"time":"2026-04-25T20:13:33.512352077Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-25T20:13:34.125183027Z","level":"INFO","msg":"stream: created new stream","id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125255934Z","level":"INFO","msg":"handler: started","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125364107Z","level":"INFO","msg":"stream: started","id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125374614Z","level":"INFO","msg":"writer: started","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.125380451Z","level":"INFO","msg":"sender: started","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:13:34.291689491Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-25T20:21:29.531100906Z","level":"ERROR","msg":"api: HTTP error","status":403,"method":"POST","url":"https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/p8ozhgpm/file_stream"} +{"time":"2026-04-25T20:21:29.531179064Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 403 Forbidden url=https://wandb.platun0v.ru/files/nikita/code-completion_lr-sweep/p8ozhgpm/file_stream: "} +{"time":"2026-04-25T20:56:12.137741653Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-25T20:56:12.139056718Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-25T20:56:12.142590311Z","level":"INFO","msg":"stream: closing","id":"p8ozhgpm"} +{"time":"2026-04-25T20:56:12.142600734Z","level":"INFO","msg":"handler: closed","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:56:12.142693536Z","level":"INFO","msg":"sender: closed","stream_id":"p8ozhgpm"} +{"time":"2026-04-25T20:56:12.142707407Z","level":"INFO","msg":"stream: closed","id":"p8ozhgpm"} diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug.log b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..64e5cdb474041b621347916c4adba93911b07bd6 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug.log @@ -0,0 +1,24 @@ +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_setup.py:_flush():81] Configure stats pid to 129801 +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug.log +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/logs/debug-internal.log +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:init():844] calling init triggers +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'name': 'EleutherAI/pythia-1b', 'checkpoint_path': None, 'from_scratch': False}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 12, 'gradient_accumulation_steps': 4, 'lr': 0.0001, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 4, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'pythia_1b_lr_1e-4', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-4'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_pythia/train.py'}} +2026-04-25 20:13:33,217 INFO MainThread:129801 [wandb_init.py:init():892] starting backend +2026-04-25 20:13:33,489 INFO MainThread:129801 [wandb_init.py:init():895] sending inform_init request +2026-04-25 20:13:33,511 INFO MainThread:129801 [wandb_init.py:init():903] backend started and connected +2026-04-25 20:13:33,514 INFO MainThread:129801 [wandb_init.py:init():973] updated telemetry +2026-04-25 20:13:33,531 INFO MainThread:129801 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 20:13:34,288 INFO MainThread:129801 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 20:13:34,450 INFO MainThread:129801 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 20:13:34,453 INFO MainThread:129801 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-25 20:56:10,222 INFO MainThread:129801 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/p8ozhgpm +2026-04-25 20:56:10,223 INFO MainThread:129801 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-25 20:56:10,223 INFO MainThread:129801 [wandb_run.py:_restore():2476] restore +2026-04-25 20:56:10,223 INFO MainThread:129801 [wandb_run.py:_restore():2482] restore done +2026-04-25 20:56:12,142 INFO MainThread:129801 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/run-p8ozhgpm.wandb b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/run-p8ozhgpm.wandb new file mode 100644 index 0000000000000000000000000000000000000000..80a39577ee02fffd9c949a2a6302108696044a83 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/run-p8ozhgpm.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fea7db4858a46fec2e270b0d9240f62348b97a0e96c0caa6ce9211097353a26 +size 1257139 diff --git a/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/run-p8ozhgpm.wandb.synced b/lr_sweep/pythia_1b_lr_1e-4/wandb/run-20260425_201333-p8ozhgpm/run-p8ozhgpm.wandb.synced new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/code/code_completion_exp/train_pythia/train.py b/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/code/code_completion_exp/train_pythia/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a4739962b19b1d61085c8b55220470866db8aea1 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/code/code_completion_exp/train_pythia/train.py @@ -0,0 +1,606 @@ +""" +Training Pipeline для Pythia (decoder-only transformer) на задаче Code Completion. + +Конфигурация через Hydra + OmegaConf, логирование в Trackio. +Поддержка DDP через Accelerate для multi-GPU тренировки. + +Использование: + # Базовый запуск (single GPU) + python train.py + + # Multi-GPU с Accelerate + accelerate launch train.py + + # Multi-GPU с указанием количества GPU + accelerate launch --num_processes=4 train.py + + # Переопределение параметров через CLI + python train.py training.lr=1e-4 training.epochs=5 + + # Выбор другого конфига модели + python train.py model=pythia_160m + + # Multirun (sweep) + python train.py --multirun training.lr=1e-4,3e-4,1e-3 + + # Без логирования + python train.py tracking.enabled=false +""" + +import os +import math +import time +from pathlib import Path + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader +from datasets import load_from_disk + +import hydra +from hydra.core.hydra_config import HydraConfig +from omegaconf import DictConfig, OmegaConf +from transformers import ( + AutoTokenizer, + AutoModelForCausalLM, + AutoConfig, + PreTrainedTokenizerBase, +) +from accelerate import Accelerator +from accelerate.utils import set_seed as accelerate_set_seed + +# Ensure repo root is on sys.path (needed when running from subdirectory) +import sys +sys.path.insert(0, str(Path(__file__).resolve().parents[2])) + +# Shared training library +from training_lib.utils import AverageMeter, log_message +from training_lib.checkpointing import save_checkpoint, load_checkpoint +from training_lib.schedulers import get_lr_scheduler +from training_lib.tracking import init_tracking, log_metrics, finish_tracking +from training_lib.validation import run_validation + + +# ============================================================================ +# ДАННЫЕ +# ============================================================================ + + +class CodeCompletionCollator: + """Collate function для батчирования примеров code completion.""" + + def __init__( + self, + tokenizer: PreTrainedTokenizerBase, + max_context_len: int = 1024, + max_target_len: int = 256, + ): + self.tokenizer = tokenizer + self.max_context_len = max_context_len + self.max_target_len = max_target_len + self.pad_token_id = tokenizer.pad_token_id + + def __call__(self, batch: list[dict]) -> dict: + contexts = [item["context"] for item in batch] + targets = [item["target"] for item in batch] + + encoded_contexts = self.tokenizer( + contexts, + add_special_tokens=True, + truncation=True, + max_length=self.max_context_len, + return_tensors=None, + ) + encoded_targets = self.tokenizer( + targets, + add_special_tokens=False, + truncation=True, + max_length=self.max_target_len, + return_tensors=None, + ) + + input_ids_list = [] + context_lengths = [] + + for ctx_ids, tgt_ids in zip( + encoded_contexts["input_ids"], encoded_targets["input_ids"] + ): + tgt_ids = tgt_ids + [self.tokenizer.eos_token_id] + context_lengths.append(len(ctx_ids)) + input_ids_list.append(ctx_ids + tgt_ids) + + max_len = max(len(ids) for ids in input_ids_list) + + padded_input_ids = [] + attention_mask = [] + + for ids in input_ids_list: + padding_len = max_len - len(ids) + padded_input_ids.append(ids + [self.pad_token_id] * padding_len) + attention_mask.append([1] * len(ids) + [0] * padding_len) + + return { + "input_ids": torch.tensor(padded_input_ids, dtype=torch.long), + "attention_mask": torch.tensor(attention_mask, dtype=torch.long), + "context_lengths": torch.tensor(context_lengths, dtype=torch.long), + } + + +def create_dataloaders( + cfg: DictConfig, tokenizer: PreTrainedTokenizerBase +) -> dict[str, DataLoader]: + """Создание DataLoader'ов для train и validation.""" + dataset_dict = load_from_disk(cfg.data.path) + + collator = CodeCompletionCollator( + tokenizer=tokenizer, + max_context_len=cfg.data.max_context_len, + max_target_len=cfg.data.max_target_len, + ) + + dataloaders = {} + + if "train" in dataset_dict: + train_dataset = dataset_dict["train"] + max_train = cfg.data.get("max_train_samples", None) + if max_train is not None: + train_dataset = train_dataset.select(range(min(max_train, len(train_dataset)))) + dataloaders["train"] = DataLoader( + train_dataset, + batch_size=cfg.training.batch_size, + shuffle=True, + collate_fn=collator, + num_workers=cfg.data.num_workers, + pin_memory=cfg.data.pin_memory, + ) + + if "validation" in dataset_dict: + val_dataset = dataset_dict["validation"] + max_val = cfg.data.get("max_val_samples", None) + if max_val is not None: + val_dataset = val_dataset.select(range(min(max_val, len(val_dataset)))) + eval_batch_size = cfg.training.get("eval_batch_size", cfg.training.batch_size) + dataloaders["validation"] = DataLoader( + val_dataset, + batch_size=eval_batch_size, + shuffle=False, + collate_fn=collator, + num_workers=cfg.data.num_workers, + pin_memory=cfg.data.pin_memory, + ) + + return dataloaders + + + + +# ============================================================================ +# LOSS ФУНКЦИИ +# ============================================================================ + + +def compute_loss( + logits: torch.Tensor, + input_ids: torch.Tensor, + context_lengths: torch.Tensor, + attention_mask: torch.Tensor, +) -> dict: + """Вычисление loss для авторегрессионной модели.""" + batch_size, seq_len, vocab_size = logits.shape + + shift_logits = logits[:, :-1, :].contiguous() + shift_labels = input_ids[:, 1:].contiguous() + shift_mask = attention_mask[:, 1:].contiguous() + + target_mask = torch.zeros_like(shift_labels, dtype=torch.bool) + for i in range(batch_size): + ctx_len = context_lengths[i].item() + target_mask[i, ctx_len - 1 :] = True + + final_mask = target_mask & shift_mask.bool() + + if final_mask.sum() > 0: + loss = F.cross_entropy( + shift_logits[final_mask], shift_labels[final_mask], reduction="mean" + ) + else: + loss = torch.tensor(0.0, device=logits.device) + + return {"loss": loss} + + +def _pythia_forward_loss( + model: nn.Module, + batch: dict, + cfg: DictConfig, + accelerator: Accelerator, +) -> dict: + """Forward + loss for a plain HF causal LM (attention_mask= kwarg, .logits).""" + input_ids = batch["input_ids"] + attention_mask = batch["attention_mask"] + context_lengths = batch["context_lengths"] + output = model(input_ids, attention_mask=attention_mask) + return compute_loss(output.logits, input_ids, context_lengths, attention_mask) + + +# ============================================================================ +# PARAMETER GROUPING +# ============================================================================ + + +def group_params(model: nn.Module, weight_decay: float) -> list[dict]: + """Группировка параметров для optimizer.""" + decay_params = [] + no_decay_params = [] + + for name, param in model.named_parameters(): + if not param.requires_grad: + continue + + if "bias" in name or "LayerNorm" in name or "layernorm" in name: + no_decay_params.append(param) + else: + decay_params.append(param) + + return [ + {"params": decay_params, "weight_decay": weight_decay}, + {"params": no_decay_params, "weight_decay": 0.0}, + ] + + + + +# ============================================================================ +# TRAINING LOOP +# ============================================================================ + + +def train_epoch( + model: nn.Module, + dataloader: DataLoader, + optimizer: torch.optim.Optimizer, + scheduler, + cfg: DictConfig, + epoch: int, + global_step: int, + accelerator: Accelerator, + val_dataloader: DataLoader | None = None, + best_val_loss: float = float("inf"), +) -> tuple[int, float]: + """Один epoch тренировки. Возвращает (global_step, best_val_loss).""" + model.train() + + loss_meter = AverageMeter() + + optimizer.zero_grad() + accumulated_loss = 0.0 + accumulated_steps = 0 + + epoch_start_time = time.time() + step_start_time = time.time() + + for batch_idx, batch in enumerate(dataloader): + input_ids = batch["input_ids"] + attention_mask = batch["attention_mask"] + context_lengths = batch["context_lengths"] + + with accelerator.autocast(): + output = model(input_ids, attention_mask=attention_mask) + logits = output.logits + loss_dict = compute_loss( + logits, input_ids, context_lengths, attention_mask + ) + + loss = loss_dict["loss"] / cfg.training.gradient_accumulation_steps + accelerator.backward(loss) + + accumulated_loss += loss_dict["loss"].item() + accumulated_steps += 1 + + if accumulated_steps == cfg.training.gradient_accumulation_steps: + if cfg.training.max_grad_norm > 0: + accelerator.clip_grad_norm_( + model.parameters(), cfg.training.max_grad_norm + ) + + optimizer.step() + scheduler.step() + optimizer.zero_grad() + + avg_loss = accumulated_loss / cfg.training.gradient_accumulation_steps + loss_meter.update(avg_loss) + + global_step += 1 + + if global_step % cfg.logging.log_interval == 0: + step_time = time.time() - step_start_time + current_lr = scheduler.get_last_lr()[0] + + metrics = { + "train/loss": loss_meter.val, + "train/loss_avg": loss_meter.avg, + "train/lr": current_lr, + "train/epoch": epoch, + "train/step_time": step_time / cfg.logging.log_interval, + } + + log_metrics(metrics, step=global_step) + + log_message( + f"Epoch {epoch} | Step {global_step} | " + f"Loss: {loss_meter.avg:.4f} | " + f"LR: {current_lr:.2e}", + cfg, + accelerator, + ) + + step_start_time = time.time() + + if ( + cfg.logging.save_interval > 0 + and global_step % cfg.logging.save_interval == 0 + ): + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + eval_interval = cfg.logging.get("eval_interval", 0) + if ( + eval_interval > 0 + and val_dataloader is not None + and global_step % eval_interval == 0 + ): + val_metrics = run_validation( + model=model, + dataloader=val_dataloader, + cfg=cfg, + global_step=global_step, + accelerator=accelerator, + forward_loss_fn=_pythia_forward_loss, + ) + + if val_metrics["val/loss"] < best_val_loss: + best_val_loss = val_metrics["val/loss"] + if accelerator.is_main_process: + best_model_path = Path(cfg.paths.output_dir) / "model_best.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), best_model_path) + log_message( + f"New best model saved! Val loss: {best_val_loss:.4f}", + cfg, + accelerator + ) + + log_metrics( + { + "best/val_loss": best_val_loss, + "best/val_perplexity": val_metrics["val/perplexity"], + "best/step": global_step, + }, + step=global_step, + ) + + model.train() + + accumulated_loss = 0.0 + accumulated_steps = 0 + + epoch_time = time.time() - epoch_start_time + + log_message( + f"Epoch {epoch} completed in {epoch_time:.2f}s | " + f"Loss: {loss_meter.avg:.4f}", + cfg, + accelerator, + ) + + log_metrics({ + "epoch/loss": loss_meter.avg, + "epoch/time": epoch_time, + }) + + return global_step, best_val_loss + + +# ============================================================================ +# MAIN +# ============================================================================ + + +@hydra.main(version_base=None, config_path="configs", config_name="config") +def main(cfg: DictConfig): + """Главная функция тренировки с поддержкой DDP через Accelerate.""" + + # === Performance: Enable TF32 for faster matmuls on Ampere+ GPUs === + torch.set_float32_matmul_precision('high') + + # === Accelerator Setup === + mixed_precision = "bf16" if cfg.training.use_amp else "no" + + accelerator = Accelerator( + mixed_precision=mixed_precision, + gradient_accumulation_steps=cfg.training.gradient_accumulation_steps, + ) + + # === Setup === + accelerate_set_seed(cfg.seed) + + if cfg.paths.output_dir is None: + cfg.paths.output_dir = HydraConfig.get().runtime.output_dir + + OmegaConf.resolve(cfg) + + log_message(f"CUDA_VISIBLE_DEVICES: {os.environ.get('CUDA_VISIBLE_DEVICES', 'not set')}", cfg, accelerator) + log_message(f"Number of processes: {accelerator.num_processes}", cfg, accelerator) + log_message(f"Process index: {accelerator.process_index}", cfg, accelerator) + log_message(f"Mixed precision: {mixed_precision}", cfg, accelerator) + + log_message("=" * 60, cfg, accelerator) + log_message("Pythia Training Pipeline (Hydra + Trackio + Accelerate)", cfg, accelerator) + log_message("=" * 60, cfg, accelerator) + log_message(f"Config:\n{OmegaConf.to_yaml(cfg)}", cfg, accelerator) + + # === Trackio Init === + init_tracking(cfg, accelerator) + + # === Tokenizer === + log_message("Initializing tokenizer...", cfg, accelerator) + tokenizer = AutoTokenizer.from_pretrained(cfg.model.name) + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + tokenizer.pad_token_id = tokenizer.eos_token_id + + # === Model === + log_message("Loading model...", cfg, accelerator) + + # Flash Attention 2 + torch_dtype = torch.bfloat16 if cfg.training.use_amp else torch.float32 + + if cfg.model.checkpoint_path: + model = AutoModelForCausalLM.from_pretrained( + cfg.model.name, + attn_implementation="flash_attention_2", + torch_dtype=torch_dtype, + ) + checkpoint = torch.load(cfg.model.checkpoint_path, map_location="cpu") + model.load_state_dict(checkpoint["model_state_dict"] if "model_state_dict" in checkpoint else checkpoint) + log_message(f"Loaded checkpoint: {cfg.model.checkpoint_path}", cfg, accelerator) + elif cfg.model.from_scratch: + config = AutoConfig.from_pretrained(cfg.model.name) + config._attn_implementation = "flash_attention_2" + model = AutoModelForCausalLM.from_config(config, torch_dtype=torch_dtype) + log_message(f"Initialized from scratch: {cfg.model.name}", cfg, accelerator) + else: + model = AutoModelForCausalLM.from_pretrained( + cfg.model.name, + attn_implementation="flash_attention_2", + torch_dtype=torch_dtype, + ) + log_message(f"Loaded pretrained: {cfg.model.name}", cfg, accelerator) + + model.train() + + # Log model info + total_params = sum(p.numel() for p in model.parameters()) + trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) + log_message(f"Total params: {total_params:,}", cfg, accelerator) + log_message(f"Trainable params: {trainable_params:,}", cfg, accelerator) + + # === Data === + log_message("Creating dataloaders...", cfg, accelerator) + dataloaders = create_dataloaders(cfg, tokenizer) + + train_dataloader = dataloaders["train"] + val_dataloader = dataloaders.get("validation", None) + + log_message(f"Train dataset size: {len(train_dataloader.dataset)}", cfg, accelerator) + log_message(f"Train batches per epoch (before DDP split): {len(train_dataloader)}", cfg, accelerator) + + if val_dataloader: + log_message(f"Validation dataset size: {len(val_dataloader.dataset)}", cfg, accelerator) + log_message(f"Validation batches: {len(val_dataloader)}", cfg, accelerator) + else: + log_message("No validation dataset found", cfg, accelerator) + + # === Optimizer === + log_message("Creating optimizer...", cfg, accelerator) + param_groups = group_params(model, cfg.training.weight_decay) + + optimizer = torch.optim.AdamW( + param_groups, + lr=cfg.training.lr, + betas=tuple(cfg.training.betas), + eps=cfg.training.eps, + ) + + # === Scheduler === + steps_per_epoch = math.ceil( + len(train_dataloader) / accelerator.num_processes + ) + total_steps = ( + cfg.training.epochs + * steps_per_epoch + // cfg.training.gradient_accumulation_steps + ) + scheduler = get_lr_scheduler(optimizer, cfg, total_steps) + + log_message( + f"Total steps: {total_steps}, Steps per epoch: {steps_per_epoch}", + cfg, + accelerator + ) + + # === Accelerate Prepare === + log_message("Preparing model, optimizer, and dataloaders with Accelerate...", cfg, accelerator) + + if val_dataloader is not None: + model, optimizer, train_dataloader, val_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, val_dataloader, scheduler + ) + else: + model, optimizer, train_dataloader, scheduler = accelerator.prepare( + model, optimizer, train_dataloader, scheduler + ) + + log_message(f"Train batches per epoch (after DDP split): {len(train_dataloader)}", cfg, accelerator) + + # === Resume === + global_step = 0 + start_epoch = 1 + + if cfg.training.resume and cfg.training.resume_checkpoint: + global_step, start_epoch = load_checkpoint( + model, optimizer, scheduler, cfg.training.resume_checkpoint, cfg, accelerator + ) + start_epoch += 1 + + # === Training Loop === + log_message("Starting training...", cfg, accelerator) + + best_val_loss = float("inf") + + try: + for epoch in range(start_epoch, cfg.training.epochs + 1): + log_message(f"\n{'=' * 60}", cfg, accelerator) + log_message(f"EPOCH {epoch}/{cfg.training.epochs}", cfg, accelerator) + log_message(f"{'=' * 60}", cfg, accelerator) + + global_step, best_val_loss = train_epoch( + model=model, + dataloader=train_dataloader, + optimizer=optimizer, + scheduler=scheduler, + cfg=cfg, + epoch=epoch, + global_step=global_step, + accelerator=accelerator, + val_dataloader=val_dataloader, + best_val_loss=best_val_loss, + ) + + if cfg.logging.save_every_epoch: + save_checkpoint( + model, optimizer, scheduler, global_step, epoch, cfg, accelerator + ) + + except KeyboardInterrupt: + log_message("Training interrupted by user", cfg, accelerator) + save_checkpoint(model, optimizer, scheduler, global_step, epoch, cfg, accelerator) + + # === Final Save === + log_message("\nTraining completed!", cfg, accelerator) + + if accelerator.is_main_process: + final_model_path = Path(cfg.paths.output_dir) / "model_final.pt" + unwrapped_model = accelerator.unwrap_model(model) + torch.save(unwrapped_model.state_dict(), final_model_path) + log_message(f"Final model: {final_model_path}", cfg, accelerator) + + accelerator.wait_for_everyone() + finish_tracking() + + +if __name__ == "__main__": + main() diff --git a/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/config.yaml b/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f80e65fa383c22f5bfc20f95a0a25237df0bbf13 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/config.yaml @@ -0,0 +1,146 @@ +_wandb: + value: + cli_version: 0.24.0 + code_path: code/code_completion_exp/train_pythia/train.py + e: + dmklf1i0rmj0ula04lyo02ubhqbt3jfa: + args: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=pythia_1b_lr_1e-5 + - training.lr=1e-5 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-5 + - model=pythia_1b + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + codePath: code_completion_exp/train_pythia/train.py + codePathLocal: train.py + cpu_count: 64 + cpu_count_logical: 128 + cudaVersion: "12.2" + disk: + /: + total: "265214230528" + used: "91343659008" + email: nikita@local.ru + executable: /venv/bytellm/bin/python + git: + commit: f111e13281aa0dc58e24302edab5b0d5c2024586 + remote: https://github.com/naryst/byte-llms-code.git + gpu: NVIDIA H100 80GB HBM3 + gpu_count: 4 + gpu_nvidia: + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-b60cdcab-2033-2009-41de-be646c953a20 + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-9982b420-4520-4238-c378-ec5a46015474 + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-e26ebaac-aaa6-3eed-17ab-a3dce303a76f + - architecture: Hopper + cudaCores: 16896 + memoryTotal: "85520809984" + name: NVIDIA H100 80GB HBM3 + uuid: GPU-9dfc6dba-0be6-4a10-1027-336cc0e65134 + host: 7504e518d24a + memory: + total: "1081679683584" + os: Linux-5.4.0-176-generic-x86_64-with-glibc2.35 + program: /workspace/byte-llms-code/code_completion_exp/train_pythia/train.py + python: CPython 3.12.0 + root: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-5 + startedAt: "2026-04-25T18:06:09.709909Z" + writerId: dmklf1i0rmj0ula04lyo02ubhqbt3jfa + m: [] + python_version: 3.12.0 + t: + "1": + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + "2": + - 1 + - 11 + - 49 + - 50 + - 51 + - 71 + - 105 + "3": + - 2 + - 13 + - 16 + - 61 + "4": 3.12.0 + "5": 0.24.0 + "6": 4.57.6 + "12": 0.24.0 + "13": linux-x86_64 +data: + value: + max_context_len: 4096 + max_target_len: 256 + max_train_samples: null + max_val_samples: 2000 + num_workers: 4 + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + pin_memory: true +device: + value: cuda +logging: + value: + eval_interval: 2000 + log_interval: 10 + save_every_epoch: false + save_interval: 0 +model: + value: + checkpoint_path: null + from_scratch: false + name: EleutherAI/pythia-1b +paths: + value: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-5 +seed: + value: 42 +tracking: + value: + backend: wandb + base_url: https://wandb.platun0v.ru + enabled: true + entity: null + local_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_1e-5 + project: code-completion_lr-sweep + run_name: pythia_1b_lr_1e-5 +training: + value: + batch_size: 4 + betas: + - 0.9 + - 0.95 + decay_ratio: 0.2 + epochs: 1 + eps: 1e-08 + eval_batch_size: 12 + gradient_accumulation_steps: 4 + lr: 1e-05 + lr_scheduler: wsd + max_grad_norm: 1 + min_lr_ratio: 0.1 + resume: false + resume_checkpoint: null + use_amp: true + warmup_ratio: 0.1 + warmup_steps: 100 + weight_decay: 0.1 diff --git a/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/wandb-summary.json b/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a07e167f2435b3c2b251ccb3e8a6f8e566193318 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_1e-5/wandb/run-20260425_180609-3z5g26qd/files/wandb-summary.json @@ -0,0 +1 @@ +{"best/step":8000,"train/loss":0.7713669911026955,"best/val_loss":1.0157066516223408,"train/lr":1.0000000000000002e-06,"_timestamp":1.7771428882060647e+09,"train/loss_avg":1.039686610145958,"best/val_perplexity":2.835609638473858,"_step":9880,"train/epoch":1,"val/time":3.7534332275390625,"_wandb":{"runtime":2519},"epoch/time":2512.484726667404,"train/step_time":0.2500261068344116,"epoch/loss":1.039672339170334,"val/loss":1.0157066516223408,"_runtime":2519,"val/perplexity":2.835609638473858} \ No newline at end of file diff --git a/lr_sweep/pythia_1b_lr_2e-5/.hydra/config.yaml b/lr_sweep/pythia_1b_lr_2e-5/.hydra/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..71cef8dd018dad2102c290458ac86300d968b30d --- /dev/null +++ b/lr_sweep/pythia_1b_lr_2e-5/.hydra/config.yaml @@ -0,0 +1,49 @@ +model: + name: EleutherAI/pythia-1b + checkpoint_path: null + from_scratch: false +training: + epochs: 1 + batch_size: 4 + eval_batch_size: 12 + gradient_accumulation_steps: 4 + lr: 2.0e-05 + weight_decay: 0.1 + betas: + - 0.9 + - 0.95 + eps: 1.0e-08 + lr_scheduler: wsd + warmup_ratio: 0.1 + decay_ratio: 0.2 + warmup_steps: 100 + min_lr_ratio: 0.1 + max_grad_norm: 1.0 + use_amp: true + resume: false + resume_checkpoint: null +data: + path: /workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + max_context_len: 4096 + max_target_len: 256 + num_workers: 4 + pin_memory: true + max_train_samples: null + max_val_samples: 2000 +logging: + log_interval: 10 + save_interval: 0 + eval_interval: 2000 + save_every_epoch: false +tracking: + enabled: true + backend: wandb + project: code-completion_lr-sweep + run_name: pythia_1b_lr_2e-5 + entity: null + base_url: https://wandb.platun0v.ru + local_dir: ${paths.output_dir} +paths: + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5 +seed: 42 +device: cuda diff --git a/lr_sweep/pythia_1b_lr_2e-5/.hydra/hydra.yaml b/lr_sweep/pythia_1b_lr_2e-5/.hydra/hydra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4f0c97aa2b1911809a3598496f5e73a7dadf8b35 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_2e-5/.hydra/hydra.yaml @@ -0,0 +1,167 @@ +hydra: + run: + dir: ${paths.output_dir} + sweep: + dir: outputs/multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][HYDRA] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + loggers: + logging_example: + level: DEBUG + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: simple + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.runtime.output_dir}/${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: [] + callbacks: {} + output_subdir: .hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - tracking=wandb + - tracking.project=code-completion_lr-sweep + - tracking.run_name=pythia_1b_lr_2e-5 + - training.lr=2e-5 + - paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5 + - model=pythia_1b + - data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full + job: + name: train + chdir: false + override_dirname: data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full,model=pythia_1b,paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5,tracking.project=code-completion_lr-sweep,tracking.run_name=pythia_1b_lr_2e-5,tracking=wandb,training.lr=2e-5 + id: ??? + num: ??? + config_name: config + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /workspace/byte-llms-code/code_completion_exp/train_pythia + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /workspace/byte-llms-code/code_completion_exp/train_pythia/configs + schema: file + provider: main + - path: '' + schema: structured + provider: schema + output_dir: /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5 + choices: + paths: default + tracking: wandb + logging: default + data: default + training: default + model: pythia_1b + hydra/env: default + hydra/callbacks: null + hydra/job_logging: default + hydra/hydra_logging: default + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/lr_sweep/pythia_1b_lr_2e-5/.hydra/overrides.yaml b/lr_sweep/pythia_1b_lr_2e-5/.hydra/overrides.yaml new file mode 100644 index 0000000000000000000000000000000000000000..21e78b4e541b2bd03719d03371f3f13624f3e115 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_2e-5/.hydra/overrides.yaml @@ -0,0 +1,7 @@ +- tracking=wandb +- tracking.project=code-completion_lr-sweep +- tracking.run_name=pythia_1b_lr_2e-5 +- training.lr=2e-5 +- paths.output_dir=/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5 +- model=pythia_1b +- data.path=/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full diff --git a/lr_sweep/pythia_1b_lr_2e-5/wandb/debug-internal.log b/lr_sweep/pythia_1b_lr_2e-5/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..60c34c55cab1b284f66ff2659efaac30991aa222 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_2e-5/wandb/debug-internal.log @@ -0,0 +1,13 @@ +{"time":"2026-04-25T18:48:23.273198118Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"} +{"time":"2026-04-25T18:48:23.732205852Z","level":"INFO","msg":"stream: created new stream","id":"bhvwo83l"} +{"time":"2026-04-25T18:48:23.732250159Z","level":"INFO","msg":"handler: started","stream_id":"bhvwo83l"} +{"time":"2026-04-25T18:48:23.732362504Z","level":"INFO","msg":"stream: started","id":"bhvwo83l"} +{"time":"2026-04-25T18:48:23.732371188Z","level":"INFO","msg":"writer: started","stream_id":"bhvwo83l"} +{"time":"2026-04-25T18:48:23.7323745Z","level":"INFO","msg":"sender: started","stream_id":"bhvwo83l"} +{"time":"2026-04-25T18:48:23.86036147Z","level":"ERROR","msg":"git repo not found","error":"repository does not exist"} +{"time":"2026-04-25T19:30:34.127544149Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"} +{"time":"2026-04-25T19:30:34.287461914Z","level":"INFO","msg":"handler: operation stats","stats":{}} +{"time":"2026-04-25T19:30:34.290164857Z","level":"INFO","msg":"stream: closing","id":"bhvwo83l"} +{"time":"2026-04-25T19:30:34.29017794Z","level":"INFO","msg":"handler: closed","stream_id":"bhvwo83l"} +{"time":"2026-04-25T19:30:34.290285178Z","level":"INFO","msg":"sender: closed","stream_id":"bhvwo83l"} +{"time":"2026-04-25T19:30:34.290289857Z","level":"INFO","msg":"stream: closed","id":"bhvwo83l"} diff --git a/lr_sweep/pythia_1b_lr_2e-5/wandb/debug.log b/lr_sweep/pythia_1b_lr_2e-5/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..631ad1e71926de64c4e92f29b1b3f14fbfe83a5a --- /dev/null +++ b/lr_sweep/pythia_1b_lr_2e-5/wandb/debug.log @@ -0,0 +1,24 @@ +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_setup.py:_flush():81] Configure stats pid to 87140 +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_184822-bhvwo83l/logs/debug.log +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_184822-bhvwo83l/logs/debug-internal.log +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_init.py:init():844] calling init triggers +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'name': 'EleutherAI/pythia-1b', 'checkpoint_path': None, 'from_scratch': False}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 12, 'gradient_accumulation_steps': 4, 'lr': 2e-05, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 4, 'pin_memory': True, 'max_train_samples': None, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 0, 'eval_interval': 2000, 'save_every_epoch': False}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'pythia_1b_lr_2e-5', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_pythia/train.py'}} +2026-04-25 18:48:22,990 INFO MainThread:87140 [wandb_init.py:init():892] starting backend +2026-04-25 18:48:23,256 INFO MainThread:87140 [wandb_init.py:init():895] sending inform_init request +2026-04-25 18:48:23,271 INFO MainThread:87140 [wandb_init.py:init():903] backend started and connected +2026-04-25 18:48:23,275 INFO MainThread:87140 [wandb_init.py:init():973] updated telemetry +2026-04-25 18:48:23,290 INFO MainThread:87140 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 18:48:23,859 INFO MainThread:87140 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 18:48:24,017 INFO MainThread:87140 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 18:48:24,018 INFO MainThread:87140 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 18:48:24,018 INFO MainThread:87140 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 18:48:24,018 INFO MainThread:87140 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 18:48:24,020 INFO MainThread:87140 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-25 19:30:33,277 INFO MainThread:87140 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/bhvwo83l +2026-04-25 19:30:33,278 INFO MainThread:87140 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-25 19:30:33,278 INFO MainThread:87140 [wandb_run.py:_restore():2476] restore +2026-04-25 19:30:33,278 INFO MainThread:87140 [wandb_run.py:_restore():2482] restore done +2026-04-25 19:30:34,289 INFO MainThread:87140 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/logs/debug.log b/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..1baea30278b0b2a368d49ef070d053dc58ce1264 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/logs/debug.log @@ -0,0 +1,24 @@ +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_setup.py:_flush():81] Current SDK version is 0.24.0 +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_setup.py:_flush():81] Configure stats pid to 56987 +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/logs/debug.log +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/logs/debug-internal.log +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_init.py:init():844] calling init triggers +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'model': {'name': 'EleutherAI/pythia-1b', 'checkpoint_path': None, 'from_scratch': False}, 'training': {'epochs': 1, 'batch_size': 4, 'eval_batch_size': 12, 'gradient_accumulation_steps': 4, 'lr': 2e-05, 'weight_decay': 0.1, 'betas': [0.9, 0.95], 'eps': 1e-08, 'lr_scheduler': 'wsd', 'warmup_ratio': 0.1, 'decay_ratio': 0.2, 'warmup_steps': 100, 'min_lr_ratio': 0.1, 'max_grad_norm': 1.0, 'use_amp': True, 'resume': False, 'resume_checkpoint': None}, 'data': {'path': '/workspace/byte-llms-code/code_completion_exp/datasets/data_V4_full', 'max_context_len': 4096, 'max_target_len': 256, 'num_workers': 4, 'pin_memory': True, 'max_train_samples': 20000, 'max_val_samples': 2000}, 'logging': {'log_interval': 10, 'save_interval': 3000, 'eval_interval': 1000, 'save_every_epoch': True}, 'tracking': {'enabled': True, 'backend': 'wandb', 'project': 'code-completion_lr-sweep', 'run_name': 'pythia_1b_lr_2e-5', 'entity': None, 'base_url': 'https://wandb.platun0v.ru', 'local_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5'}, 'paths': {'output_dir': '/workspace/byte-llms-code/outputs/lr_sweep/pythia_1b_lr_2e-5'}, 'seed': 42, 'device': 'cuda', '_wandb': {'code_path': 'code/code_completion_exp/train_pythia/train.py'}} +2026-04-25 17:54:39,295 INFO MainThread:56987 [wandb_init.py:init():892] starting backend +2026-04-25 17:54:39,565 INFO MainThread:56987 [wandb_init.py:init():895] sending inform_init request +2026-04-25 17:54:39,585 INFO MainThread:56987 [wandb_init.py:init():903] backend started and connected +2026-04-25 17:54:39,588 INFO MainThread:56987 [wandb_init.py:init():973] updated telemetry +2026-04-25 17:54:39,605 INFO MainThread:56987 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-04-25 17:54:40,109 INFO MainThread:56987 [wandb_init.py:init():1044] starting run threads in backend +2026-04-25 17:54:40,270 INFO MainThread:56987 [wandb_run.py:_console_start():2529] atexit reg +2026-04-25 17:54:40,270 INFO MainThread:56987 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-04-25 17:54:40,270 INFO MainThread:56987 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-04-25 17:54:40,270 INFO MainThread:56987 [wandb_run.py:_redirect():2469] Redirects installed. +2026-04-25 17:54:40,273 INFO MainThread:56987 [wandb_init.py:init():1084] run started, returning control to user process +2026-04-25 17:57:41,875 INFO MainThread:56987 [wandb_run.py:_finish():2295] finishing run nikita/code-completion_lr-sweep/8mll1jbb +2026-04-25 17:57:41,875 INFO MainThread:56987 [wandb_run.py:_atexit_cleanup():2494] got exitcode: 0 +2026-04-25 17:57:41,876 INFO MainThread:56987 [wandb_run.py:_restore():2476] restore +2026-04-25 17:57:41,876 INFO MainThread:56987 [wandb_run.py:_restore():2482] restore done +2026-04-25 17:57:42,532 INFO MainThread:56987 [wandb_run.py:_footer_sync_info():3870] logging synced files diff --git a/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/run-8mll1jbb.wandb b/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/run-8mll1jbb.wandb new file mode 100644 index 0000000000000000000000000000000000000000..2980ee9fe9da16a377cbee17de621203281cf6ea Binary files /dev/null and b/lr_sweep/pythia_1b_lr_2e-5/wandb/run-20260425_175439-8mll1jbb/run-8mll1jbb.wandb differ diff --git a/lr_sweep/pythia_1b_lr_5e-5/checkpoints/checkpoint_latest.pt b/lr_sweep/pythia_1b_lr_5e-5/checkpoints/checkpoint_latest.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a79d315809954bf4b509a92baa970e0a10dccf3 --- /dev/null +++ b/lr_sweep/pythia_1b_lr_5e-5/checkpoints/checkpoint_latest.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ce9557a7f71097c57f6f246687af89b7c5ea41d6d157a71e03bbf259cf1b3f +size 6070947222