diff --git "a/zjun028/nougat-latex-1026/nougat-base.log" "b/zjun028/nougat-latex-1026/nougat-base.log" new file mode 100644--- /dev/null +++ "b/zjun028/nougat-latex-1026/nougat-base.log" @@ -0,0 +1,3558 @@ +2023-10-25 17:19:59 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 17:21:19 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 17:21:21 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 17:29:57 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 17:30:07 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 17:30:09 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 17:30:10 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:30:10 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 17:30:11 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:30:11 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 17:30:11 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 17:30:11 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 17:34:01 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 17:34:12 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 17:34:14 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 17:34:15 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:34:15 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 17:34:15 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:34:15 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 17:34:15 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 17:34:15 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 17:38:44 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 17:38:55 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 17:38:57 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 17:38:57 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:38:57 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 17:38:58 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:38:58 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 17:38:58 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 17:38:58 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 17:56:13 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 17:56:25 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 17:56:27 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 17:56:27 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:56:27 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 17:56:28 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:56:28 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 17:56:28 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 17:56:28 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 17:57:59 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 17:58:10 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 17:58:12 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 17:58:13 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:58:13 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 17:58:13 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 17:58:13 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 17:58:13 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 17:58:13 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:04:36 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:04:47 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:04:49 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:04:49 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:04:49 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:04:50 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:04:50 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:04:50 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:04:50 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:05:58 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:06:08 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:06:10 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:06:11 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:06:11 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:06:11 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:06:11 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:06:11 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:06:11 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:07:47 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:07:58 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:08:00 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:08:01 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:08:01 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:08:02 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:08:02 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:08:02 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:08:02 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:08:56 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:09:07 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:09:09 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:09:10 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:09:10 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:09:10 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:09:10 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:09:10 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:09:10 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:10:16 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:10:27 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:10:30 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:10:30 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:10:30 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:10:31 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:10:31 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:10:31 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:10:31 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:12:43 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:12:54 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:12:56 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:12:56 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:12:56 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:12:57 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:12:57 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:12:57 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:12:57 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:15:48 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:15:59 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:16:01 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:16:02 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:16:02 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:16:02 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:16:02 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:16:02 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:16:02 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:17:32 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:17:43 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:17:45 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:17:46 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:17:46 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:17:46 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:17:46 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:17:46 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:17:46 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:20:48 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:20:59 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:21:01 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:21:02 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:21:02 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:21:02 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:21:02 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:21:02 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:21:02 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:23:42 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:23:53 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:23:55 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:23:56 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:23:56 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:23:56 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:23:56 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:23:56 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:23:56 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:24:43 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:24:53 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:24:55 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:24:55 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:24:55 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:24:56 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:24:56 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:24:56 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:24:56 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:25:30 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:25:40 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:25:42 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:25:43 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:25:43 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:25:43 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:25:43 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:25:43 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:25:43 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:28:45 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:28:56 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:28:58 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:28:58 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:28:58 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:28:59 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:28:59 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:28:59 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:28:59 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:31:32 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:31:43 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:31:45 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:31:46 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:31:46 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:31:46 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:31:46 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:31:47 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:31:47 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:36:09 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:36:20 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:36:22 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:36:22 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:36:22 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:36:23 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:36:23 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:36:23 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:36:23 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:40:42 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:40:52 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:40:54 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:40:55 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:40:55 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:40:55 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:40:55 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:40:55 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:40:55 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:46:23 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:46:33 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:46:35 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:46:36 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:46:36 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:46:36 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:46:36 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:46:36 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:46:36 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:47:37 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:47:47 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:47:49 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:47:49 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:47:49 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:47:50 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:47:50 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:47:50 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:47:50 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 18:56:53 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 18:57:03 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 18:57:05 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 18:57:06 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:57:06 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 18:57:07 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 18:57:07 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 18:57:07 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 18:57:07 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 19:01:00 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 19:01:10 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 19:01:12 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 19:01:12 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 19:01:12 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 19:01:13 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 19:01:13 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 19:01:13 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 19:01:13 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 19:02:22 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 19:02:32 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 19:02:34 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 19:02:34 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 19:02:34 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 19:02:35 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 19:02:35 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 19:02:35 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 19:02:35 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 19:04:13 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 19:04:23 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 19:04:26 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 19:04:26 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 19:04:26 INFO root donut_experiment.py:179 - success init train data loader len:19 +2023-10-25 19:04:27 INFO root donut_experiment.py:226 - use data loader with batch_size:32,num_workers:10 +2023-10-25 19:04:27 INFO root donut_experiment.py:192 - success init eval data loader len:2 +2023-10-25 19:04:27 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:380, gradient_accumulator:1 +2023-10-25 19:04:27 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:19 +2023-10-25 19:05:48 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 19:05:59 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 19:06:01 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 19:06:01 INFO root donut_experiment.py:226 - use data loader with batch_size:2,num_workers:10 +2023-10-25 19:06:01 INFO root donut_experiment.py:179 - success init train data loader len:293 +2023-10-25 19:06:02 INFO root donut_experiment.py:226 - use data loader with batch_size:2,num_workers:10 +2023-10-25 19:06:02 INFO root donut_experiment.py:192 - success init eval data loader len:18 +2023-10-25 19:06:02 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:5860, gradient_accumulator:1 +2023-10-25 19:06:02 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:293 +2023-10-25 19:06:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 20, lr:8.000000e-07, step_mean_loss:1.0334564447402954, average_loss:0.5413098052144051), time, (train_step_time: 0.39210s, train_average_time: 0.54199s);(grad_norm_mean: 6.70255, grad_norm_step: 8.21541) +2023-10-25 19:06:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 40, lr:1.600000e-06, step_mean_loss:0.3707965016365051, average_loss:0.5345996044576168), time, (train_step_time: 0.39695s, train_average_time: 0.46896s);(grad_norm_mean: inf, grad_norm_step: 5.75765) +2023-10-25 19:06:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 60, lr:2.400000e-06, step_mean_loss:0.30842819809913635, average_loss:0.5048427758117516), time, (train_step_time: 0.41430s, train_average_time: 0.44559s);(grad_norm_mean: inf, grad_norm_step: 3.31236) +2023-10-25 19:07:57 INFO root base_experiment.py:174 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-25 19:08:07 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-25 19:08:09 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-25 19:08:10 INFO root donut_experiment.py:226 - use data loader with batch_size:2,num_workers:10 +2023-10-25 19:08:10 INFO root donut_experiment.py:179 - success init train data loader len:293 +2023-10-25 19:08:10 INFO root donut_experiment.py:226 - use data loader with batch_size:2,num_workers:10 +2023-10-25 19:08:10 INFO root donut_experiment.py:192 - success init eval data loader len:18 +2023-10-25 19:08:10 INFO root base_experiment.py:286 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:5860, gradient_accumulator:1 +2023-10-25 19:08:10 INFO root base_experiment.py:217 - current trainer epochs:20, train_dataset_len:586, data_loader_len:293 +2023-10-25 19:08:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 20, lr:8.000000e-07, step_mean_loss:0.5698135495185852, average_loss:0.6431094348430634), time, (train_step_time: 0.41107s, train_average_time: 0.48006s);(grad_norm_mean: inf, grad_norm_step: 8.88052) +2023-10-25 19:08:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 40, lr:1.600000e-06, step_mean_loss:0.6910229921340942, average_loss:0.5465618316084146), time, (train_step_time: 0.38905s, train_average_time: 0.44725s);(grad_norm_mean: inf, grad_norm_step: 11.43882) +2023-10-25 19:08:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 60, lr:2.400000e-06, step_mean_loss:0.3023693561553955, average_loss:0.5165627491970857), time, (train_step_time: 0.41141s, train_average_time: 0.43441s);(grad_norm_mean: inf, grad_norm_step: 4.02926) +2023-10-25 19:08:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 80, lr:3.200000e-06, step_mean_loss:0.3593837022781372, average_loss:0.45938945887610316), time, (train_step_time: 0.38618s, train_average_time: 0.42599s);(grad_norm_mean: inf, grad_norm_step: 3.77035) +2023-10-25 19:08:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 100, lr:4.000000e-06, step_mean_loss:0.060860950499773026, average_loss:0.4274003865197301), time, (train_step_time: 0.44262s, train_average_time: 0.42138s);(grad_norm_mean: inf, grad_norm_step: 1.79640) +2023-10-25 19:09:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 120, lr:4.800000e-06, step_mean_loss:0.5032721757888794, average_loss:0.40917447237297894), time, (train_step_time: 0.38142s, train_average_time: 0.41731s);(grad_norm_mean: inf, grad_norm_step: 5.30051) +2023-10-25 19:09:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 140, lr:5.600000e-06, step_mean_loss:0.10092995315790176, average_loss:0.38182895627937147), time, (train_step_time: 0.38085s, train_average_time: 0.41469s);(grad_norm_mean: inf, grad_norm_step: 3.13964) +2023-10-25 19:09:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 160, lr:6.400000e-06, step_mean_loss:0.5731406211853027, average_loss:0.36034257798455654), time, (train_step_time: 0.41812s, train_average_time: 0.41386s);(grad_norm_mean: inf, grad_norm_step: 9.64285) +2023-10-25 19:09:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 180, lr:7.200000e-06, step_mean_loss:0.24061137437820435, average_loss:0.3412759664985869), time, (train_step_time: 0.40836s, train_average_time: 0.41165s);(grad_norm_mean: inf, grad_norm_step: 4.93475) +2023-10-25 19:09:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 200, lr:8.000000e-06, step_mean_loss:0.1538688838481903, average_loss:0.32199137458577753), time, (train_step_time: 0.41313s, train_average_time: 0.41011s);(grad_norm_mean: inf, grad_norm_step: 3.34540) +2023-10-25 19:09:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 220, lr:8.800000e-06, step_mean_loss:0.27010321617126465, average_loss:0.30953311148014934), time, (train_step_time: 0.38492s, train_average_time: 0.40942s);(grad_norm_mean: inf, grad_norm_step: 5.15811) +2023-10-25 19:09:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 240, lr:9.600000e-06, step_mean_loss:0.12352931499481201, average_loss:0.29449842849280683), time, (train_step_time: 0.40345s, train_average_time: 0.40831s);(grad_norm_mean: inf, grad_norm_step: 7.61944) +2023-10-25 19:09:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 260, lr:1.040000e-05, step_mean_loss:0.07055763155221939, average_loss:0.28493862841994716), time, (train_step_time: 0.40133s, train_average_time: 0.40717s);(grad_norm_mean: inf, grad_norm_step: 4.13309) +2023-10-25 19:10:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 280, lr:1.120000e-05, step_mean_loss:0.08951990306377411, average_loss:0.28180453432391267), time, (train_step_time: 0.42204s, train_average_time: 0.40556s);(grad_norm_mean: inf, grad_norm_step: 1.76893) +2023-10-25 19:10:09 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 0, steps: 293); +2023-10-25 19:10:14 INFO root donut_experiment.py:62 - batch inference time:4.35906195640564 s +2023-10-25 19:10:16 INFO root donut_experiment.py:62 - batch inference time:2.4038665294647217 s +2023-10-25 19:10:18 INFO root donut_experiment.py:62 - batch inference time:1.4039843082427979 s +2023-10-25 19:10:19 INFO root donut_experiment.py:62 - batch inference time:0.8308486938476562 s +2023-10-25 19:10:20 INFO root donut_experiment.py:62 - batch inference time:1.9063847064971924 s +2023-10-25 19:10:21 INFO root donut_experiment.py:62 - batch inference time:1.0095303058624268 s +2023-10-25 19:10:22 INFO root donut_experiment.py:62 - batch inference time:0.7063772678375244 s +2023-10-25 19:10:24 INFO root donut_experiment.py:62 - batch inference time:1.3783416748046875 s +2023-10-25 19:10:25 INFO root donut_experiment.py:62 - batch inference time:1.3877766132354736 s +2023-10-25 19:10:27 INFO root donut_experiment.py:62 - batch inference time:2.035304069519043 s +2023-10-25 19:10:38 INFO root donut_experiment.py:62 - batch inference time:11.269998788833618 s +2023-10-25 19:10:39 INFO root donut_experiment.py:62 - batch inference time:0.6236717700958252 s +2023-10-25 19:10:41 INFO root donut_experiment.py:62 - batch inference time:1.9204394817352295 s +2023-10-25 19:10:42 INFO root donut_experiment.py:62 - batch inference time:1.5958852767944336 s +2023-10-25 19:10:44 INFO root donut_experiment.py:62 - batch inference time:1.9440078735351562 s +2023-10-25 19:10:46 INFO root donut_experiment.py:62 - batch inference time:1.3218069076538086 s +2023-10-25 19:10:57 INFO root donut_experiment.py:62 - batch inference time:11.254744529724121 s +2023-10-25 19:10:59 INFO root donut_experiment.py:62 - batch inference time:1.5570850372314453 s +2023-10-25 19:10:59 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:10:59 INFO root donut_experiment.py:72 - token_acc: 0.26607298001737617; edit_dis: 0.9427380473490601 +2023-10-25 19:11:03 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch0_step293_lr1.172000e-05_avg_loss0.27628_token_acc0.26607_edit_dis0.94274.pth +2023-10-25 19:11:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 300, lr:1.200000e-05, step_mean_loss:0.20369330048561096, average_loss:0.2740019034811606), time, (train_step_time: 0.37586s, train_average_time: 0.40401s);(grad_norm_mean: inf, grad_norm_step: 3.97689) +2023-10-25 19:11:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 320, lr:1.280000e-05, step_mean_loss:0.14041785895824432, average_loss:0.26674913979077247), time, (train_step_time: 0.38529s, train_average_time: 0.40352s);(grad_norm_mean: inf, grad_norm_step: 3.64730) +2023-10-25 19:11:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 340, lr:1.360000e-05, step_mean_loss:0.1458083838224411, average_loss:0.26072923101156076), time, (train_step_time: 0.41577s, train_average_time: 0.40303s);(grad_norm_mean: inf, grad_norm_step: 3.27873) +2023-10-25 19:11:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 360, lr:1.440000e-05, step_mean_loss:0.12572769820690155, average_loss:0.2536622075829655), time, (train_step_time: 0.41403s, train_average_time: 0.40262s);(grad_norm_mean: inf, grad_norm_step: 2.60891) +2023-10-25 19:11:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 380, lr:1.520000e-05, step_mean_loss:0.05231565237045288, average_loss:0.24589245798752496), time, (train_step_time: 0.41628s, train_average_time: 0.40265s);(grad_norm_mean: inf, grad_norm_step: 3.88378) +2023-10-25 19:11:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 400, lr:1.600000e-05, step_mean_loss:0.1942630410194397, average_loss:0.2410282031772658), time, (train_step_time: 0.39386s, train_average_time: 0.40239s);(grad_norm_mean: inf, grad_norm_step: 3.49318) +2023-10-25 19:11:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 420, lr:1.680000e-05, step_mean_loss:0.07814240455627441, average_loss:0.23428522862405293), time, (train_step_time: 0.44164s, train_average_time: 0.40203s);(grad_norm_mean: inf, grad_norm_step: 1.67594) +2023-10-25 19:12:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 440, lr:1.760000e-05, step_mean_loss:0.12508292496204376, average_loss:0.22885475673670458), time, (train_step_time: 0.38937s, train_average_time: 0.40230s);(grad_norm_mean: inf, grad_norm_step: 4.00407) +2023-10-25 19:12:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 460, lr:1.840000e-05, step_mean_loss:0.2300819605588913, average_loss:0.22398561501227643), time, (train_step_time: 0.37896s, train_average_time: 0.40191s);(grad_norm_mean: inf, grad_norm_step: 8.63207) +2023-10-25 19:12:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 480, lr:1.920000e-05, step_mean_loss:0.11832627654075623, average_loss:0.22028162749144636), time, (train_step_time: 0.38681s, train_average_time: 0.40152s);(grad_norm_mean: inf, grad_norm_step: 3.92948) +2023-10-25 19:12:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 500, lr:2.000000e-05, step_mean_loss:0.05610547587275505, average_loss:0.21634643477620558), time, (train_step_time: 0.40523s, train_average_time: 0.40120s);(grad_norm_mean: inf, grad_norm_step: 1.88992) +2023-10-25 19:12:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 520, lr:1.999931e-05, step_mean_loss:0.5237329006195068, average_loss:0.2128531501704576), time, (train_step_time: 0.36462s, train_average_time: 0.40097s);(grad_norm_mean: inf, grad_norm_step: 17.06282) +2023-10-25 19:12:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 540, lr:1.999725e-05, step_mean_loss:0.045913852751255035, average_loss:0.20893853295603076), time, (train_step_time: 0.38512s, train_average_time: 0.40064s);(grad_norm_mean: inf, grad_norm_step: 1.80129) +2023-10-25 19:12:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 560, lr:1.999382e-05, step_mean_loss:0.23505675792694092, average_loss:0.20806694351111737), time, (train_step_time: 0.41110s, train_average_time: 0.40043s);(grad_norm_mean: inf, grad_norm_step: 2.92687) +2023-10-25 19:12:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 580, lr:1.998901e-05, step_mean_loss:0.07230100780725479, average_loss:0.20614770907820362), time, (train_step_time: 0.38010s, train_average_time: 0.39992s);(grad_norm_mean: inf, grad_norm_step: 4.36508) +2023-10-25 19:12:59 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 1, steps: 586); +2023-10-25 19:13:02 INFO root donut_experiment.py:62 - batch inference time:2.545872688293457 s +2023-10-25 19:13:04 INFO root donut_experiment.py:62 - batch inference time:1.8464503288269043 s +2023-10-25 19:13:05 INFO root donut_experiment.py:62 - batch inference time:1.1047704219818115 s +2023-10-25 19:13:06 INFO root donut_experiment.py:62 - batch inference time:0.7328519821166992 s +2023-10-25 19:13:08 INFO root donut_experiment.py:62 - batch inference time:1.4847254753112793 s +2023-10-25 19:13:08 INFO root donut_experiment.py:62 - batch inference time:0.8210494518280029 s +2023-10-25 19:13:09 INFO root donut_experiment.py:62 - batch inference time:0.5508003234863281 s +2023-10-25 19:13:18 INFO root donut_experiment.py:62 - batch inference time:8.688187599182129 s +2023-10-25 19:13:19 INFO root donut_experiment.py:62 - batch inference time:1.0374343395233154 s +2023-10-25 19:13:20 INFO root donut_experiment.py:62 - batch inference time:1.553391695022583 s +2023-10-25 19:13:22 INFO root donut_experiment.py:62 - batch inference time:1.562894582748413 s +2023-10-25 19:13:22 INFO root donut_experiment.py:62 - batch inference time:0.44373059272766113 s +2023-10-25 19:13:24 INFO root donut_experiment.py:62 - batch inference time:1.4760799407958984 s +2023-10-25 19:13:25 INFO root donut_experiment.py:62 - batch inference time:1.119771957397461 s +2023-10-25 19:13:27 INFO root donut_experiment.py:62 - batch inference time:1.74595046043396 s +2023-10-25 19:13:28 INFO root donut_experiment.py:62 - batch inference time:1.0358765125274658 s +2023-10-25 19:13:29 INFO root donut_experiment.py:62 - batch inference time:1.5410354137420654 s +2023-10-25 19:13:30 INFO root donut_experiment.py:62 - batch inference time:1.223635196685791 s +2023-10-25 19:13:31 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:13:31 INFO root donut_experiment.py:72 - token_acc: 0.31311351623458344; edit_dis: 0.6606059747637391 +2023-10-25 19:13:35 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch1_step586_lr1.998730e-05_avg_loss0.20468_token_acc0.31311_edit_dis0.66061.pth +2023-10-25 19:13:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 600, lr:1.998283e-05, step_mean_loss:0.1364688277244568, average_loss:0.20171789244477017), time, (train_step_time: 0.47851s, train_average_time: 0.39994s);(grad_norm_mean: inf, grad_norm_step: 2.54247) +2023-10-25 19:13:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 620, lr:1.997528e-05, step_mean_loss:0.04410626366734505, average_loss:0.19763770417588192), time, (train_step_time: 0.38732s, train_average_time: 0.39995s);(grad_norm_mean: inf, grad_norm_step: 4.55007) +2023-10-25 19:13:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 640, lr:1.996635e-05, step_mean_loss:0.1396440863609314, average_loss:0.19386872248178405), time, (train_step_time: 0.37601s, train_average_time: 0.39961s);(grad_norm_mean: inf, grad_norm_step: 7.22559) +2023-10-25 19:14:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 660, lr:1.995606e-05, step_mean_loss:0.01696319505572319, average_loss:0.1906079357310029), time, (train_step_time: 0.37691s, train_average_time: 0.39917s);(grad_norm_mean: inf, grad_norm_step: 1.09223) +2023-10-25 19:14:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 680, lr:1.994440e-05, step_mean_loss:0.011289330199360847, average_loss:0.18718311569194637), time, (train_step_time: 0.38143s, train_average_time: 0.39915s);(grad_norm_mean: inf, grad_norm_step: 1.01063) +2023-10-25 19:14:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 700, lr:1.993137e-05, step_mean_loss:0.04243778437376022, average_loss:0.18346603733919828), time, (train_step_time: 0.46801s, train_average_time: 0.39903s);(grad_norm_mean: inf, grad_norm_step: 1.15992) +2023-10-25 19:14:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 720, lr:1.991698e-05, step_mean_loss:0.09436096251010895, average_loss:0.18120401320597415), time, (train_step_time: 0.37141s, train_average_time: 0.39919s);(grad_norm_mean: inf, grad_norm_step: 3.03640) +2023-10-25 19:14:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 740, lr:1.990123e-05, step_mean_loss:0.06484244018793106, average_loss:0.17935948006940905), time, (train_step_time: 0.38749s, train_average_time: 0.39917s);(grad_norm_mean: inf, grad_norm_step: 1.82563) +2023-10-25 19:14:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 760, lr:1.988411e-05, step_mean_loss:0.0738377496600151, average_loss:0.17719496617026284), time, (train_step_time: 0.38379s, train_average_time: 0.39890s);(grad_norm_mean: inf, grad_norm_step: 18.12372) +2023-10-25 19:14:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 780, lr:1.986564e-05, step_mean_loss:0.17264814674854279, average_loss:0.17548722930592842), time, (train_step_time: 0.37588s, train_average_time: 0.39872s);(grad_norm_mean: inf, grad_norm_step: 7.88195) +2023-10-25 19:15:00 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 800, lr:1.984581e-05, step_mean_loss:0.0066395048052072525, average_loss:0.1728152085270267), time, (train_step_time: 0.38700s, train_average_time: 0.39847s);(grad_norm_mean: inf, grad_norm_step: 0.57805) +2023-10-25 19:15:08 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 820, lr:1.982463e-05, step_mean_loss:0.0680861696600914, average_loss:0.17073375950453848), time, (train_step_time: 0.37575s, train_average_time: 0.39831s);(grad_norm_mean: inf, grad_norm_step: 4.93978) +2023-10-25 19:15:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 840, lr:1.980209e-05, step_mean_loss:0.05889544636011124, average_loss:0.16853030493283377), time, (train_step_time: 0.40100s, train_average_time: 0.39822s);(grad_norm_mean: inf, grad_norm_step: 2.23503) +2023-10-25 19:15:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 860, lr:1.977821e-05, step_mean_loss:0.05590119585394859, average_loss:0.16704621310183387), time, (train_step_time: 0.35411s, train_average_time: 0.39827s);(grad_norm_mean: inf, grad_norm_step: 1.36591) +2023-10-25 19:15:31 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 2, steps: 879); +2023-10-25 19:15:34 INFO root donut_experiment.py:62 - batch inference time:2.3739237785339355 s +2023-10-25 19:15:36 INFO root donut_experiment.py:62 - batch inference time:1.9019427299499512 s +2023-10-25 19:15:37 INFO root donut_experiment.py:62 - batch inference time:1.0821523666381836 s +2023-10-25 19:15:37 INFO root donut_experiment.py:62 - batch inference time:0.7423429489135742 s +2023-10-25 19:15:39 INFO root donut_experiment.py:62 - batch inference time:1.5198695659637451 s +2023-10-25 19:15:40 INFO root donut_experiment.py:62 - batch inference time:0.8010854721069336 s +2023-10-25 19:15:40 INFO root donut_experiment.py:62 - batch inference time:0.5641274452209473 s +2023-10-25 19:15:41 INFO root donut_experiment.py:62 - batch inference time:1.024557113647461 s +2023-10-25 19:15:42 INFO root donut_experiment.py:62 - batch inference time:1.1002240180969238 s +2023-10-25 19:15:44 INFO root donut_experiment.py:62 - batch inference time:1.5821678638458252 s +2023-10-25 19:15:46 INFO root donut_experiment.py:62 - batch inference time:1.4165761470794678 s +2023-10-25 19:15:46 INFO root donut_experiment.py:62 - batch inference time:0.45760226249694824 s +2023-10-25 19:15:47 INFO root donut_experiment.py:62 - batch inference time:1.5066308975219727 s +2023-10-25 19:15:49 INFO root donut_experiment.py:62 - batch inference time:1.2178020477294922 s +2023-10-25 19:15:50 INFO root donut_experiment.py:62 - batch inference time:1.4668407440185547 s +2023-10-25 19:15:51 INFO root donut_experiment.py:62 - batch inference time:1.0107414722442627 s +2023-10-25 19:15:53 INFO root donut_experiment.py:62 - batch inference time:1.3268928527832031 s +2023-10-25 19:15:54 INFO root donut_experiment.py:62 - batch inference time:1.2715528011322021 s +2023-10-25 19:15:54 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:15:54 INFO root donut_experiment.py:72 - token_acc: 0.3857772183763373; edit_dis: 0.12260622182161694 +2023-10-25 19:15:58 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch2_step879_lr1.975428e-05_avg_loss0.16531_token_acc0.38578_edit_dis0.12261.pth +2023-10-25 19:15:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 880, lr:1.975299e-05, step_mean_loss:0.03630199655890465, average_loss:0.165159034159098), time, (train_step_time: 0.49019s, train_average_time: 0.39770s);(grad_norm_mean: inf, grad_norm_step: 1.63287) +2023-10-25 19:16:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 900, lr:1.972643e-05, step_mean_loss:0.10410097986459732, average_loss:0.16263495183705043), time, (train_step_time: 0.38899s, train_average_time: 0.39779s);(grad_norm_mean: inf, grad_norm_step: 4.15950) +2023-10-25 19:16:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 920, lr:1.969853e-05, step_mean_loss:0.013197394087910652, average_loss:0.16035876192567547), time, (train_step_time: 0.39644s, train_average_time: 0.39765s);(grad_norm_mean: inf, grad_norm_step: 0.51409) +2023-10-25 19:16:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 940, lr:1.966930e-05, step_mean_loss:0.056872278451919556, average_loss:0.15790438670120102), time, (train_step_time: 0.39446s, train_average_time: 0.39749s);(grad_norm_mean: inf, grad_norm_step: 2.05834) +2023-10-25 19:16:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 960, lr:1.963874e-05, step_mean_loss:0.11207107454538345, average_loss:0.15563640277735732), time, (train_step_time: 0.37687s, train_average_time: 0.39735s);(grad_norm_mean: inf, grad_norm_step: 4.97482) +2023-10-25 19:16:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 980, lr:1.960685e-05, step_mean_loss:0.009620372205972672, average_loss:0.15356474057296102), time, (train_step_time: 0.37921s, train_average_time: 0.39716s);(grad_norm_mean: inf, grad_norm_step: 0.94553) +2023-10-25 19:16:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1000, lr:1.957365e-05, step_mean_loss:0.05122983455657959, average_loss:0.15178337527997793), time, (train_step_time: 0.38790s, train_average_time: 0.39714s);(grad_norm_mean: inf, grad_norm_step: 1.35449) +2023-10-25 19:16:46 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 3, steps: 1000); +2023-10-25 19:16:49 INFO root donut_experiment.py:62 - batch inference time:2.529047966003418 s +2023-10-25 19:16:51 INFO root donut_experiment.py:62 - batch inference time:1.8726763725280762 s +2023-10-25 19:16:52 INFO root donut_experiment.py:62 - batch inference time:1.0719785690307617 s +2023-10-25 19:16:53 INFO root donut_experiment.py:62 - batch inference time:0.6997439861297607 s +2023-10-25 19:16:55 INFO root donut_experiment.py:62 - batch inference time:1.4787397384643555 s +2023-10-25 19:16:55 INFO root donut_experiment.py:62 - batch inference time:0.7961220741271973 s +2023-10-25 19:16:56 INFO root donut_experiment.py:62 - batch inference time:0.5578098297119141 s +2023-10-25 19:16:57 INFO root donut_experiment.py:62 - batch inference time:0.9424679279327393 s +2023-10-25 19:16:58 INFO root donut_experiment.py:62 - batch inference time:1.043837547302246 s +2023-10-25 19:17:00 INFO root donut_experiment.py:62 - batch inference time:1.557753086090088 s +2023-10-25 19:17:01 INFO root donut_experiment.py:62 - batch inference time:1.3106327056884766 s +2023-10-25 19:17:01 INFO root donut_experiment.py:62 - batch inference time:0.4490933418273926 s +2023-10-25 19:17:03 INFO root donut_experiment.py:62 - batch inference time:1.3971545696258545 s +2023-10-25 19:17:04 INFO root donut_experiment.py:62 - batch inference time:1.5662431716918945 s +2023-10-25 19:17:06 INFO root donut_experiment.py:62 - batch inference time:1.9533681869506836 s +2023-10-25 19:17:08 INFO root donut_experiment.py:62 - batch inference time:1.402052879333496 s +2023-10-25 19:17:09 INFO root donut_experiment.py:62 - batch inference time:1.6724672317504883 s +2023-10-25 19:17:11 INFO root donut_experiment.py:62 - batch inference time:1.5747270584106445 s +2023-10-25 19:17:11 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:17:11 INFO root donut_experiment.py:72 - token_acc: 0.3810870248193528; edit_dis: 0.11906945326359132 +2023-10-25 19:17:16 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch3_step1000_lr1.957365e-05_avg_loss0.15178_token_acc0.38109_edit_dis0.11907.pth +2023-10-25 19:17:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1020, lr:1.953913e-05, step_mean_loss:0.0775587186217308, average_loss:0.15012582686693207), time, (train_step_time: 0.38301s, train_average_time: 0.39704s);(grad_norm_mean: inf, grad_norm_step: 2.45742) +2023-10-25 19:17:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1040, lr:1.950329e-05, step_mean_loss:0.04142499342560768, average_loss:0.14894911205718436), time, (train_step_time: 0.40555s, train_average_time: 0.39722s);(grad_norm_mean: inf, grad_norm_step: 1.81204) +2023-10-25 19:17:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1060, lr:1.946616e-05, step_mean_loss:0.07239114493131638, average_loss:0.1476331710977972), time, (train_step_time: 0.47043s, train_average_time: 0.39748s);(grad_norm_mean: inf, grad_norm_step: 1.50476) +2023-10-25 19:17:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1080, lr:1.942772e-05, step_mean_loss:0.16539764404296875, average_loss:0.1465784915822075), time, (train_step_time: 0.37535s, train_average_time: 0.39744s);(grad_norm_mean: inf, grad_norm_step: 6.93631) +2023-10-25 19:17:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1100, lr:1.938798e-05, step_mean_loss:0.0730612725019455, average_loss:0.1453514872283929), time, (train_step_time: 0.38216s, train_average_time: 0.39741s);(grad_norm_mean: inf, grad_norm_step: 2.16165) +2023-10-25 19:18:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1120, lr:1.934696e-05, step_mean_loss:0.16885413229465485, average_loss:0.14393692247394937), time, (train_step_time: 0.40717s, train_average_time: 0.39738s);(grad_norm_mean: inf, grad_norm_step: 5.95481) +2023-10-25 19:18:12 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1140, lr:1.930465e-05, step_mean_loss:0.02054622583091259, average_loss:0.1424476394638125), time, (train_step_time: 0.38216s, train_average_time: 0.39746s);(grad_norm_mean: inf, grad_norm_step: 0.91738) +2023-10-25 19:18:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1160, lr:1.926107e-05, step_mean_loss:0.023741427809000015, average_loss:0.14134058630744253), time, (train_step_time: 0.36334s, train_average_time: 0.39709s);(grad_norm_mean: nan, grad_norm_step: 1.57949) +2023-10-25 19:18:24 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 3, steps: 1172); +2023-10-25 19:18:27 INFO root donut_experiment.py:62 - batch inference time:2.3827664852142334 s +2023-10-25 19:18:29 INFO root donut_experiment.py:62 - batch inference time:1.911158561706543 s +2023-10-25 19:18:30 INFO root donut_experiment.py:62 - batch inference time:1.1412696838378906 s +2023-10-25 19:18:31 INFO root donut_experiment.py:62 - batch inference time:0.7172801494598389 s +2023-10-25 19:18:32 INFO root donut_experiment.py:62 - batch inference time:1.508697271347046 s +2023-10-25 19:18:33 INFO root donut_experiment.py:62 - batch inference time:0.7832460403442383 s +2023-10-25 19:18:34 INFO root donut_experiment.py:62 - batch inference time:0.579409122467041 s +2023-10-25 19:18:35 INFO root donut_experiment.py:62 - batch inference time:1.0097923278808594 s +2023-10-25 19:18:36 INFO root donut_experiment.py:62 - batch inference time:1.0651922225952148 s +2023-10-25 19:18:37 INFO root donut_experiment.py:62 - batch inference time:1.6168501377105713 s +2023-10-25 19:18:39 INFO root donut_experiment.py:62 - batch inference time:1.5512795448303223 s +2023-10-25 19:18:39 INFO root donut_experiment.py:62 - batch inference time:0.45153188705444336 s +2023-10-25 19:18:41 INFO root donut_experiment.py:62 - batch inference time:1.4801619052886963 s +2023-10-25 19:18:42 INFO root donut_experiment.py:62 - batch inference time:1.2286255359649658 s +2023-10-25 19:18:44 INFO root donut_experiment.py:62 - batch inference time:1.7521445751190186 s +2023-10-25 19:18:45 INFO root donut_experiment.py:62 - batch inference time:1.030155897140503 s +2023-10-25 19:18:46 INFO root donut_experiment.py:62 - batch inference time:1.3495466709136963 s +2023-10-25 19:18:47 INFO root donut_experiment.py:62 - batch inference time:1.239755392074585 s +2023-10-25 19:18:47 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:18:47 INFO root donut_experiment.py:72 - token_acc: 0.31952291274325173; edit_dis: 0.1102203601110938 +2023-10-25 19:18:52 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch3_step1172_lr1.923430e-05_avg_loss0.14052_token_acc0.31952_edit_dis0.11022.pth +2023-10-25 19:18:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1180, lr:1.921621e-05, step_mean_loss:0.039486128836870193, average_loss:0.13983056494251872), time, (train_step_time: 0.46652s, train_average_time: 0.39699s);(grad_norm_mean: nan, grad_norm_step: 1.07401) +2023-10-25 19:19:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1200, lr:1.917008e-05, step_mean_loss:0.05359914153814316, average_loss:0.13818718853154374), time, (train_step_time: 0.37711s, train_average_time: 0.39697s);(grad_norm_mean: nan, grad_norm_step: 6.42924) +2023-10-25 19:19:12 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1220, lr:1.912270e-05, step_mean_loss:0.06125006824731827, average_loss:0.13685458610445017), time, (train_step_time: 0.37930s, train_average_time: 0.39696s);(grad_norm_mean: nan, grad_norm_step: 5.69267) +2023-10-25 19:19:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1240, lr:1.907406e-05, step_mean_loss:0.10713247954845428, average_loss:0.13540920105597545), time, (train_step_time: 0.38898s, train_average_time: 0.39697s);(grad_norm_mean: nan, grad_norm_step: 2.83706) +2023-10-25 19:19:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1260, lr:1.902417e-05, step_mean_loss:0.07492990046739578, average_loss:0.13441412590170604), time, (train_step_time: 0.39589s, train_average_time: 0.39689s);(grad_norm_mean: nan, grad_norm_step: 2.52621) +2023-10-25 19:19:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1280, lr:1.897304e-05, step_mean_loss:0.009256553836166859, average_loss:0.1333238696861372), time, (train_step_time: 0.41449s, train_average_time: 0.39711s);(grad_norm_mean: nan, grad_norm_step: 0.48929) +2023-10-25 19:19:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1300, lr:1.892068e-05, step_mean_loss:0.039662234485149384, average_loss:0.13178730672601466), time, (train_step_time: 0.38100s, train_average_time: 0.39695s);(grad_norm_mean: nan, grad_norm_step: 1.15568) +2023-10-25 19:19:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1320, lr:1.886710e-05, step_mean_loss:0.0166995357722044, average_loss:0.13071458834650304), time, (train_step_time: 0.39324s, train_average_time: 0.39688s);(grad_norm_mean: nan, grad_norm_step: 1.53995) +2023-10-25 19:19:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1340, lr:1.881230e-05, step_mean_loss:0.03153667226433754, average_loss:0.12946615473926762), time, (train_step_time: 0.39586s, train_average_time: 0.39678s);(grad_norm_mean: nan, grad_norm_step: 2.05279) +2023-10-25 19:20:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1360, lr:1.875628e-05, step_mean_loss:0.04749425873160362, average_loss:0.1281220767997986), time, (train_step_time: 0.39603s, train_average_time: 0.39668s);(grad_norm_mean: nan, grad_norm_step: 3.33762) +2023-10-25 19:20:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1380, lr:1.869906e-05, step_mean_loss:0.041260577738285065, average_loss:0.12716382084012814), time, (train_step_time: 0.37699s, train_average_time: 0.39674s);(grad_norm_mean: nan, grad_norm_step: 1.48046) +2023-10-25 19:20:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1400, lr:1.864065e-05, step_mean_loss:0.022121421992778778, average_loss:0.12590188491973095), time, (train_step_time: 0.41124s, train_average_time: 0.39670s);(grad_norm_mean: nan, grad_norm_step: 1.83020) +2023-10-25 19:20:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1420, lr:1.858105e-05, step_mean_loss:0.13241195678710938, average_loss:0.1251592735604147), time, (train_step_time: 0.37732s, train_average_time: 0.39678s);(grad_norm_mean: nan, grad_norm_step: 4.57212) +2023-10-25 19:20:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1440, lr:1.852027e-05, step_mean_loss:0.059768714010715485, average_loss:0.12438289470139555), time, (train_step_time: 0.38531s, train_average_time: 0.39670s);(grad_norm_mean: nan, grad_norm_step: 2.16428) +2023-10-25 19:20:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1460, lr:1.845832e-05, step_mean_loss:0.024644723162055016, average_loss:0.12326771238968669), time, (train_step_time: 0.36502s, train_average_time: 0.39639s);(grad_norm_mean: nan, grad_norm_step: 3.01078) +2023-10-25 19:20:49 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 4, steps: 1465); +2023-10-25 19:20:52 INFO root donut_experiment.py:62 - batch inference time:2.5184485912323 s +2023-10-25 19:20:53 INFO root donut_experiment.py:62 - batch inference time:1.8882637023925781 s +2023-10-25 19:20:55 INFO root donut_experiment.py:62 - batch inference time:1.0996057987213135 s +2023-10-25 19:20:55 INFO root donut_experiment.py:62 - batch inference time:0.6856095790863037 s +2023-10-25 19:20:57 INFO root donut_experiment.py:62 - batch inference time:1.5162124633789062 s +2023-10-25 19:20:58 INFO root donut_experiment.py:62 - batch inference time:0.803652286529541 s +2023-10-25 19:20:58 INFO root donut_experiment.py:62 - batch inference time:0.5616521835327148 s +2023-10-25 19:20:59 INFO root donut_experiment.py:62 - batch inference time:1.0017986297607422 s +2023-10-25 19:21:00 INFO root donut_experiment.py:62 - batch inference time:1.0571553707122803 s +2023-10-25 19:21:02 INFO root donut_experiment.py:62 - batch inference time:1.582270622253418 s +2023-10-25 19:21:03 INFO root donut_experiment.py:62 - batch inference time:1.4295618534088135 s +2023-10-25 19:21:04 INFO root donut_experiment.py:62 - batch inference time:0.452986478805542 s +2023-10-25 19:21:05 INFO root donut_experiment.py:62 - batch inference time:1.481482744216919 s +2023-10-25 19:21:06 INFO root donut_experiment.py:62 - batch inference time:1.239530086517334 s +2023-10-25 19:21:08 INFO root donut_experiment.py:62 - batch inference time:1.4525580406188965 s +2023-10-25 19:21:17 INFO root donut_experiment.py:62 - batch inference time:8.871754169464111 s +2023-10-25 19:21:18 INFO root donut_experiment.py:62 - batch inference time:1.3913288116455078 s +2023-10-25 19:21:19 INFO root donut_experiment.py:62 - batch inference time:1.2313542366027832 s +2023-10-25 19:21:19 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:21:19 INFO root donut_experiment.py:72 - token_acc: 0.32810894141829394; edit_dis: 0.2618601571781231 +2023-10-25 19:21:24 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch4_step1465_lr1.844265e-05_avg_loss0.12295_token_acc0.32811_edit_dis0.26186.pth +2023-10-25 19:21:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1480, lr:1.839521e-05, step_mean_loss:0.027857623994350433, average_loss:0.12212191415129456), time, (train_step_time: 0.42990s, train_average_time: 0.39629s);(grad_norm_mean: nan, grad_norm_step: 1.03806) +2023-10-25 19:21:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1500, lr:1.833094e-05, step_mean_loss:0.06840557605028152, average_loss:0.12091812696540728), time, (train_step_time: 0.37525s, train_average_time: 0.39628s);(grad_norm_mean: nan, grad_norm_step: 3.51033) +2023-10-25 19:21:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1520, lr:1.826553e-05, step_mean_loss:0.04237540438771248, average_loss:0.1197606913649667), time, (train_step_time: 0.41044s, train_average_time: 0.39625s);(grad_norm_mean: nan, grad_norm_step: 3.87734) +2023-10-25 19:21:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1540, lr:1.819898e-05, step_mean_loss:0.030773498117923737, average_loss:0.11861220485774207), time, (train_step_time: 0.38722s, train_average_time: 0.39631s);(grad_norm_mean: nan, grad_norm_step: 1.52852) +2023-10-25 19:22:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1560, lr:1.813131e-05, step_mean_loss:0.017746632918715477, average_loss:0.11820541849600164), time, (train_step_time: 0.42883s, train_average_time: 0.39629s);(grad_norm_mean: nan, grad_norm_step: 1.33739) +2023-10-25 19:22:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1580, lr:1.806252e-05, step_mean_loss:0.17850908637046814, average_loss:0.11739146376265909), time, (train_step_time: 0.37661s, train_average_time: 0.39628s);(grad_norm_mean: nan, grad_norm_step: 7.56648) +2023-10-25 19:22:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1600, lr:1.799262e-05, step_mean_loss:0.13127954304218292, average_loss:0.11681331234845857), time, (train_step_time: 0.40570s, train_average_time: 0.39616s);(grad_norm_mean: nan, grad_norm_step: 4.30940) +2023-10-25 19:22:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1620, lr:1.792162e-05, step_mean_loss:0.12813597917556763, average_loss:0.11594352745989993), time, (train_step_time: 0.37781s, train_average_time: 0.39608s);(grad_norm_mean: nan, grad_norm_step: 5.36072) +2023-10-25 19:22:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1640, lr:1.784954e-05, step_mean_loss:0.02059217542409897, average_loss:0.11489719690906917), time, (train_step_time: 0.41914s, train_average_time: 0.39611s);(grad_norm_mean: nan, grad_norm_step: 0.89021) +2023-10-25 19:22:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1660, lr:1.777638e-05, step_mean_loss:0.040360067039728165, average_loss:0.11395950156442136), time, (train_step_time: 0.39266s, train_average_time: 0.39615s);(grad_norm_mean: nan, grad_norm_step: 4.67147) +2023-10-25 19:22:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1680, lr:1.770214e-05, step_mean_loss:0.024060579016804695, average_loss:0.11300703983974277), time, (train_step_time: 0.37722s, train_average_time: 0.39610s);(grad_norm_mean: nan, grad_norm_step: 2.49850) +2023-10-25 19:22:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1700, lr:1.762685e-05, step_mean_loss:0.025022966787219048, average_loss:0.11210727186620181), time, (train_step_time: 0.38945s, train_average_time: 0.39618s);(grad_norm_mean: nan, grad_norm_step: 1.08526) +2023-10-25 19:23:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1720, lr:1.755051e-05, step_mean_loss:0.006281794048845768, average_loss:0.11138071540416879), time, (train_step_time: 0.38364s, train_average_time: 0.39619s);(grad_norm_mean: nan, grad_norm_step: 0.30329) +2023-10-25 19:23:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1740, lr:1.747314e-05, step_mean_loss:0.033658821135759354, average_loss:0.11052232328447749), time, (train_step_time: 0.38053s, train_average_time: 0.39624s);(grad_norm_mean: nan, grad_norm_step: 1.09280) +2023-10-25 19:23:20 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 5, steps: 1758); +2023-10-25 19:23:23 INFO root donut_experiment.py:62 - batch inference time:1.9462549686431885 s +2023-10-25 19:23:25 INFO root donut_experiment.py:62 - batch inference time:1.8837878704071045 s +2023-10-25 19:23:26 INFO root donut_experiment.py:62 - batch inference time:1.1185743808746338 s +2023-10-25 19:23:27 INFO root donut_experiment.py:62 - batch inference time:0.8281958103179932 s +2023-10-25 19:23:28 INFO root donut_experiment.py:62 - batch inference time:1.508307695388794 s +2023-10-25 19:23:29 INFO root donut_experiment.py:62 - batch inference time:0.8477640151977539 s +2023-10-25 19:23:30 INFO root donut_experiment.py:62 - batch inference time:0.5664331912994385 s +2023-10-25 19:23:31 INFO root donut_experiment.py:62 - batch inference time:0.9780616760253906 s +2023-10-25 19:23:32 INFO root donut_experiment.py:62 - batch inference time:1.0732648372650146 s +2023-10-25 19:23:33 INFO root donut_experiment.py:62 - batch inference time:1.587028980255127 s +2023-10-25 19:23:35 INFO root donut_experiment.py:62 - batch inference time:1.5578551292419434 s +2023-10-25 19:23:35 INFO root donut_experiment.py:62 - batch inference time:0.45682573318481445 s +2023-10-25 19:23:37 INFO root donut_experiment.py:62 - batch inference time:1.479515790939331 s +2023-10-25 19:23:38 INFO root donut_experiment.py:62 - batch inference time:1.1270129680633545 s +2023-10-25 19:23:40 INFO root donut_experiment.py:62 - batch inference time:1.4810667037963867 s +2023-10-25 19:23:41 INFO root donut_experiment.py:62 - batch inference time:1.0907313823699951 s +2023-10-25 19:23:42 INFO root donut_experiment.py:62 - batch inference time:1.3650221824645996 s +2023-10-25 19:23:43 INFO root donut_experiment.py:62 - batch inference time:1.2439630031585693 s +2023-10-25 19:23:43 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:23:43 INFO root donut_experiment.py:72 - token_acc: 0.3533811156123403; edit_dis: 0.14771876369740555 +2023-10-25 19:23:48 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch5_step1758_lr1.740262e-05_avg_loss0.10969_token_acc0.35338_edit_dis0.14772.pth +2023-10-25 19:23:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1760, lr:1.739473e-05, step_mean_loss:0.03164156526327133, average_loss:0.10961604201603967), time, (train_step_time: 0.44759s, train_average_time: 0.39600s);(grad_norm_mean: nan, grad_norm_step: 1.49214) +2023-10-25 19:23:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1780, lr:1.731531e-05, step_mean_loss:0.07195785641670227, average_loss:0.10895707345629121), time, (train_step_time: 0.39119s, train_average_time: 0.39609s);(grad_norm_mean: nan, grad_norm_step: 3.26359) +2023-10-25 19:24:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1800, lr:1.723489e-05, step_mean_loss:0.575063169002533, average_loss:0.10842665207685463), time, (train_step_time: 0.38083s, train_average_time: 0.39608s);(grad_norm_mean: nan, grad_norm_step: 9.33539) +2023-10-25 19:24:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1820, lr:1.715347e-05, step_mean_loss:0.007139054127037525, average_loss:0.1075858720724412), time, (train_step_time: 0.39903s, train_average_time: 0.39603s);(grad_norm_mean: nan, grad_norm_step: 0.97389) +2023-10-25 19:24:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1840, lr:1.707107e-05, step_mean_loss:0.03232637792825699, average_loss:0.10677910206232691), time, (train_step_time: 0.48842s, train_average_time: 0.39610s);(grad_norm_mean: nan, grad_norm_step: 1.24426) +2023-10-25 19:24:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1860, lr:1.698769e-05, step_mean_loss:0.007328196428716183, average_loss:0.10589946637306268), time, (train_step_time: 0.42066s, train_average_time: 0.39617s);(grad_norm_mean: nan, grad_norm_step: 0.35186) +2023-10-25 19:24:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1880, lr:1.690336e-05, step_mean_loss:0.009831924922764301, average_loss:0.10516941138197039), time, (train_step_time: 0.39435s, train_average_time: 0.39622s);(grad_norm_mean: nan, grad_norm_step: 0.95801) +2023-10-25 19:24:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1900, lr:1.681808e-05, step_mean_loss:0.004908167757093906, average_loss:0.10440522663310586), time, (train_step_time: 0.37855s, train_average_time: 0.39620s);(grad_norm_mean: nan, grad_norm_step: 0.58469) +2023-10-25 19:24:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1920, lr:1.673186e-05, step_mean_loss:0.0241272933781147, average_loss:0.10358858442690083), time, (train_step_time: 0.37591s, train_average_time: 0.39612s);(grad_norm_mean: nan, grad_norm_step: 2.57635) +2023-10-25 19:25:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1940, lr:1.664471e-05, step_mean_loss:0.022997092455625534, average_loss:0.10298789716418713), time, (train_step_time: 0.38675s, train_average_time: 0.39617s);(grad_norm_mean: nan, grad_norm_step: 1.00169) +2023-10-25 19:25:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1960, lr:1.655666e-05, step_mean_loss:0.057676855474710464, average_loss:0.10231107292208008), time, (train_step_time: 0.38342s, train_average_time: 0.39615s);(grad_norm_mean: nan, grad_norm_step: 2.65133) +2023-10-25 19:25:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1980, lr:1.646770e-05, step_mean_loss:0.0019362394232302904, average_loss:0.10150196241215459), time, (train_step_time: 0.37508s, train_average_time: 0.39609s);(grad_norm_mean: nan, grad_norm_step: 0.13453) +2023-10-25 19:25:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 2000, lr:1.637785e-05, step_mean_loss:0.03065965324640274, average_loss:0.10091033567246632), time, (train_step_time: 0.40716s, train_average_time: 0.39612s);(grad_norm_mean: nan, grad_norm_step: 1.26359) +2023-10-25 19:25:25 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 6, steps: 2000); +2023-10-25 19:25:28 INFO root donut_experiment.py:62 - batch inference time:2.531747341156006 s +2023-10-25 19:25:30 INFO root donut_experiment.py:62 - batch inference time:1.8818840980529785 s +2023-10-25 19:25:31 INFO root donut_experiment.py:62 - batch inference time:1.1001005172729492 s +2023-10-25 19:25:32 INFO root donut_experiment.py:62 - batch inference time:0.7171766757965088 s +2023-10-25 19:25:33 INFO root donut_experiment.py:62 - batch inference time:1.643049955368042 s +2023-10-25 19:25:34 INFO root donut_experiment.py:62 - batch inference time:0.8078625202178955 s +2023-10-25 19:25:35 INFO root donut_experiment.py:62 - batch inference time:0.5650136470794678 s +2023-10-25 19:25:36 INFO root donut_experiment.py:62 - batch inference time:1.0238316059112549 s +2023-10-25 19:25:37 INFO root donut_experiment.py:62 - batch inference time:1.0582787990570068 s +2023-10-25 19:25:38 INFO root donut_experiment.py:62 - batch inference time:1.5965800285339355 s +2023-10-25 19:25:40 INFO root donut_experiment.py:62 - batch inference time:1.5561773777008057 s +2023-10-25 19:25:41 INFO root donut_experiment.py:62 - batch inference time:0.49555110931396484 s +2023-10-25 19:25:42 INFO root donut_experiment.py:62 - batch inference time:1.4942476749420166 s +2023-10-25 19:25:43 INFO root donut_experiment.py:62 - batch inference time:1.1683948040008545 s +2023-10-25 19:25:45 INFO root donut_experiment.py:62 - batch inference time:1.5557801723480225 s +2023-10-25 19:25:46 INFO root donut_experiment.py:62 - batch inference time:1.1969704627990723 s +2023-10-25 19:25:47 INFO root donut_experiment.py:62 - batch inference time:1.4429864883422852 s +2023-10-25 19:25:49 INFO root donut_experiment.py:62 - batch inference time:1.241525411605835 s +2023-10-25 19:25:49 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:25:49 INFO root donut_experiment.py:72 - token_acc: 0.41908325537885877; edit_dis: 0.1034391902165529 +2023-10-25 19:25:53 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch6_step2000_lr1.637785e-05_avg_loss0.10091_token_acc0.41908_edit_dis0.10344.pth +2023-10-25 19:26:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 2020, lr:1.628713e-05, step_mean_loss:0.09047757089138031, average_loss:0.10020903417763374), time, (train_step_time: 0.38452s, train_average_time: 0.39613s);(grad_norm_mean: nan, grad_norm_step: 5.28813) +2023-10-25 19:26:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 2040, lr:1.619554e-05, step_mean_loss:0.010055015794932842, average_loss:0.09941102700352478), time, (train_step_time: 0.36692s, train_average_time: 0.39597s);(grad_norm_mean: nan, grad_norm_step: 0.62988) +2023-10-25 19:26:13 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 6, steps: 2051); +2023-10-25 19:26:16 INFO root donut_experiment.py:62 - batch inference time:2.612647533416748 s +2023-10-25 19:26:18 INFO root donut_experiment.py:62 - batch inference time:1.9401755332946777 s +2023-10-25 19:26:19 INFO root donut_experiment.py:62 - batch inference time:1.1120920181274414 s +2023-10-25 19:26:20 INFO root donut_experiment.py:62 - batch inference time:0.7843360900878906 s +2023-10-25 19:26:21 INFO root donut_experiment.py:62 - batch inference time:1.5191128253936768 s +2023-10-25 19:26:22 INFO root donut_experiment.py:62 - batch inference time:0.8058514595031738 s +2023-10-25 19:26:23 INFO root donut_experiment.py:62 - batch inference time:0.5661990642547607 s +2023-10-25 19:26:24 INFO root donut_experiment.py:62 - batch inference time:1.0279860496520996 s +2023-10-25 19:26:25 INFO root donut_experiment.py:62 - batch inference time:1.0661587715148926 s +2023-10-25 19:26:26 INFO root donut_experiment.py:62 - batch inference time:1.5871257781982422 s +2023-10-25 19:26:28 INFO root donut_experiment.py:62 - batch inference time:1.8504078388214111 s +2023-10-25 19:26:29 INFO root donut_experiment.py:62 - batch inference time:0.4616701602935791 s +2023-10-25 19:26:30 INFO root donut_experiment.py:62 - batch inference time:1.7046363353729248 s +2023-10-25 19:26:32 INFO root donut_experiment.py:62 - batch inference time:1.722240924835205 s +2023-10-25 19:26:34 INFO root donut_experiment.py:62 - batch inference time:1.4661364555358887 s +2023-10-25 19:26:35 INFO root donut_experiment.py:62 - batch inference time:1.1011018753051758 s +2023-10-25 19:26:36 INFO root donut_experiment.py:62 - batch inference time:1.3602077960968018 s +2023-10-25 19:26:37 INFO root donut_experiment.py:62 - batch inference time:1.2370529174804688 s +2023-10-25 19:26:37 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:26:37 INFO root donut_experiment.py:72 - token_acc: 0.31143031784841074; edit_dis: 0.14421912458691266 +2023-10-25 19:26:42 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch6_step2051_lr1.614480e-05_avg_loss0.09904_token_acc0.31143_edit_dis0.14422.pth +2023-10-25 19:26:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2060, lr:1.610310e-05, step_mean_loss:0.0020798782352358103, average_loss:0.09876939801943434), time, (train_step_time: 0.39772s, train_average_time: 0.39588s);(grad_norm_mean: nan, grad_norm_step: 0.27689) +2023-10-25 19:26:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2080, lr:1.600983e-05, step_mean_loss:0.0018644341034814715, average_loss:0.09797474995497256), time, (train_step_time: 0.40330s, train_average_time: 0.39600s);(grad_norm_mean: nan, grad_norm_step: 0.13576) +2023-10-25 19:27:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2100, lr:1.591572e-05, step_mean_loss:0.0447566993534565, average_loss:0.09723573261510216), time, (train_step_time: 0.38129s, train_average_time: 0.39599s);(grad_norm_mean: nan, grad_norm_step: 4.41039) +2023-10-25 19:27:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2120, lr:1.582081e-05, step_mean_loss:0.012131755240261555, average_loss:0.09661207614516794), time, (train_step_time: 0.41683s, train_average_time: 0.39601s);(grad_norm_mean: nan, grad_norm_step: 0.58073) +2023-10-25 19:27:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2140, lr:1.572509e-05, step_mean_loss:0.16362422704696655, average_loss:0.09644441852142335), time, (train_step_time: 0.37636s, train_average_time: 0.39602s);(grad_norm_mean: nan, grad_norm_step: 7.10475) +2023-10-25 19:27:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2160, lr:1.562859e-05, step_mean_loss:0.008222123607993126, average_loss:0.09576821744497768), time, (train_step_time: 0.37312s, train_average_time: 0.39602s);(grad_norm_mean: nan, grad_norm_step: 0.48532) +2023-10-25 19:27:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2180, lr:1.553131e-05, step_mean_loss:0.00696885772049427, average_loss:0.09516024477863973), time, (train_step_time: 0.37925s, train_average_time: 0.39596s);(grad_norm_mean: nan, grad_norm_step: 0.68575) +2023-10-25 19:27:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2200, lr:1.543328e-05, step_mean_loss:0.023024704307317734, average_loss:0.09464953015599431), time, (train_step_time: 0.40266s, train_average_time: 0.39604s);(grad_norm_mean: nan, grad_norm_step: 2.82864) +2023-10-25 19:27:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2220, lr:1.533449e-05, step_mean_loss:0.011845819652080536, average_loss:0.09401053152481557), time, (train_step_time: 0.37787s, train_average_time: 0.39604s);(grad_norm_mean: nan, grad_norm_step: 0.61320) +2023-10-25 19:27:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2240, lr:1.523498e-05, step_mean_loss:0.04514600336551666, average_loss:0.09334881404222739), time, (train_step_time: 0.38199s, train_average_time: 0.39600s);(grad_norm_mean: nan, grad_norm_step: 4.73545) +2023-10-25 19:28:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2260, lr:1.513474e-05, step_mean_loss:0.011191297322511673, average_loss:0.09272426836208318), time, (train_step_time: 0.41763s, train_average_time: 0.39600s);(grad_norm_mean: nan, grad_norm_step: 0.46431) +2023-10-25 19:28:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2280, lr:1.503380e-05, step_mean_loss:0.018731659278273582, average_loss:0.09214658650434056), time, (train_step_time: 0.38590s, train_average_time: 0.39603s);(grad_norm_mean: nan, grad_norm_step: 1.31150) +2023-10-25 19:28:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2300, lr:1.493217e-05, step_mean_loss:0.03046467714011669, average_loss:0.09149655974415667), time, (train_step_time: 0.36861s, train_average_time: 0.39600s);(grad_norm_mean: nan, grad_norm_step: 3.84055) +2023-10-25 19:28:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2320, lr:1.482986e-05, step_mean_loss:0.0007629995234310627, average_loss:0.09090022511256142), time, (train_step_time: 0.42252s, train_average_time: 0.39605s);(grad_norm_mean: nan, grad_norm_step: 0.04880) +2023-10-25 19:28:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2340, lr:1.472689e-05, step_mean_loss:0.05884425714612007, average_loss:0.09033072940652252), time, (train_step_time: 0.37344s, train_average_time: 0.39589s);(grad_norm_mean: nan, grad_norm_step: 7.58820) +2023-10-25 19:28:39 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 7, steps: 2344); +2023-10-25 19:28:41 INFO root donut_experiment.py:62 - batch inference time:1.4545378684997559 s +2023-10-25 19:28:42 INFO root donut_experiment.py:62 - batch inference time:1.8968727588653564 s +2023-10-25 19:28:44 INFO root donut_experiment.py:62 - batch inference time:1.1178045272827148 s +2023-10-25 19:28:44 INFO root donut_experiment.py:62 - batch inference time:0.6892330646514893 s +2023-10-25 19:28:46 INFO root donut_experiment.py:62 - batch inference time:1.5046970844268799 s +2023-10-25 19:28:47 INFO root donut_experiment.py:62 - batch inference time:0.7872631549835205 s +2023-10-25 19:28:47 INFO root donut_experiment.py:62 - batch inference time:0.5596578121185303 s +2023-10-25 19:28:48 INFO root donut_experiment.py:62 - batch inference time:1.0177021026611328 s +2023-10-25 19:28:49 INFO root donut_experiment.py:62 - batch inference time:1.0861904621124268 s +2023-10-25 19:28:51 INFO root donut_experiment.py:62 - batch inference time:1.5857818126678467 s +2023-10-25 19:29:00 INFO root donut_experiment.py:62 - batch inference time:8.769229650497437 s +2023-10-25 19:29:00 INFO root donut_experiment.py:62 - batch inference time:0.4963212013244629 s +2023-10-25 19:29:02 INFO root donut_experiment.py:62 - batch inference time:1.6766767501831055 s +2023-10-25 19:29:03 INFO root donut_experiment.py:62 - batch inference time:1.1569960117340088 s +2023-10-25 19:29:04 INFO root donut_experiment.py:62 - batch inference time:1.4015305042266846 s +2023-10-25 19:29:05 INFO root donut_experiment.py:62 - batch inference time:1.0896975994110107 s +2023-10-25 19:29:07 INFO root donut_experiment.py:62 - batch inference time:1.4308149814605713 s +2023-10-25 19:29:08 INFO root donut_experiment.py:62 - batch inference time:1.212935447692871 s +2023-10-25 19:29:08 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:29:08 INFO root donut_experiment.py:72 - token_acc: 0.3070603337612324; edit_dis: 0.2944520442149814 +2023-10-25 19:29:13 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch7_step2344_lr1.470621e-05_avg_loss0.09023_token_acc0.30706_edit_dis0.29445.pth +2023-10-25 19:29:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2360, lr:1.462326e-05, step_mean_loss:0.011914233677089214, average_loss:0.0896891112081456), time, (train_step_time: 0.39415s, train_average_time: 0.39592s);(grad_norm_mean: nan, grad_norm_step: 1.22014) +2023-10-25 19:29:27 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2380, lr:1.451900e-05, step_mean_loss:0.0006502856267616153, average_loss:0.08912412567216288), time, (train_step_time: 0.39658s, train_average_time: 0.39590s);(grad_norm_mean: nan, grad_norm_step: 0.04135) +2023-10-25 19:29:35 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2400, lr:1.441412e-05, step_mean_loss:0.002452630316838622, average_loss:0.08858651440510584), time, (train_step_time: 0.44478s, train_average_time: 0.39592s);(grad_norm_mean: nan, grad_norm_step: 0.15331) +2023-10-25 19:29:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2420, lr:1.430864e-05, step_mean_loss:0.007520897779613733, average_loss:0.08805139368380255), time, (train_step_time: 0.37825s, train_average_time: 0.39598s);(grad_norm_mean: nan, grad_norm_step: 0.51389) +2023-10-25 19:29:51 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2440, lr:1.420256e-05, step_mean_loss:0.014658210799098015, average_loss:0.0874596083640516), time, (train_step_time: 0.39721s, train_average_time: 0.39599s);(grad_norm_mean: nan, grad_norm_step: 1.28672) +2023-10-25 19:29:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2460, lr:1.409590e-05, step_mean_loss:0.004123172722756863, average_loss:0.08686181401407508), time, (train_step_time: 0.40584s, train_average_time: 0.39599s);(grad_norm_mean: nan, grad_norm_step: 0.44054) +2023-10-25 19:30:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2480, lr:1.398869e-05, step_mean_loss:0.0010170585010200739, average_loss:0.08628095498669051), time, (train_step_time: 0.39890s, train_average_time: 0.39602s);(grad_norm_mean: nan, grad_norm_step: 0.07252) +2023-10-25 19:30:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2500, lr:1.388092e-05, step_mean_loss:0.01540299691259861, average_loss:0.08573820333465701), time, (train_step_time: 0.37990s, train_average_time: 0.39610s);(grad_norm_mean: nan, grad_norm_step: 1.44033) +2023-10-25 19:30:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2520, lr:1.377262e-05, step_mean_loss:0.027717331424355507, average_loss:0.08522413535560305), time, (train_step_time: 0.42190s, train_average_time: 0.39611s);(grad_norm_mean: nan, grad_norm_step: 1.90043) +2023-10-25 19:30:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2540, lr:1.366380e-05, step_mean_loss:0.01160444226115942, average_loss:0.08470155363371935), time, (train_step_time: 0.37988s, train_average_time: 0.39612s);(grad_norm_mean: nan, grad_norm_step: 2.63481) +2023-10-25 19:30:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2560, lr:1.355448e-05, step_mean_loss:0.008528520353138447, average_loss:0.08416785653977285), time, (train_step_time: 0.38258s, train_average_time: 0.39612s);(grad_norm_mean: nan, grad_norm_step: 1.02339) +2023-10-25 19:30:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2580, lr:1.344467e-05, step_mean_loss:0.005956317763775587, average_loss:0.08367175991322516), time, (train_step_time: 0.39538s, train_average_time: 0.39614s);(grad_norm_mean: nan, grad_norm_step: 0.84175) +2023-10-25 19:30:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2600, lr:1.333439e-05, step_mean_loss:0.006175921764224768, average_loss:0.08321400855858184), time, (train_step_time: 0.43596s, train_average_time: 0.39614s);(grad_norm_mean: nan, grad_norm_step: 0.94807) +2023-10-25 19:31:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2620, lr:1.322364e-05, step_mean_loss:0.0008391166338697076, average_loss:0.08268659684037477), time, (train_step_time: 0.37280s, train_average_time: 0.39614s);(grad_norm_mean: nan, grad_norm_step: 0.05577) +2023-10-25 19:31:09 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 8, steps: 2637); +2023-10-25 19:31:13 INFO root donut_experiment.py:62 - batch inference time:2.6926181316375732 s +2023-10-25 19:31:15 INFO root donut_experiment.py:62 - batch inference time:1.9491841793060303 s +2023-10-25 19:31:16 INFO root donut_experiment.py:62 - batch inference time:1.101616621017456 s +2023-10-25 19:31:17 INFO root donut_experiment.py:62 - batch inference time:0.7457079887390137 s +2023-10-25 19:31:18 INFO root donut_experiment.py:62 - batch inference time:1.6362314224243164 s +2023-10-25 19:31:19 INFO root donut_experiment.py:62 - batch inference time:0.843740701675415 s +2023-10-25 19:31:20 INFO root donut_experiment.py:62 - batch inference time:0.5594630241394043 s +2023-10-25 19:31:21 INFO root donut_experiment.py:62 - batch inference time:1.0012898445129395 s +2023-10-25 19:31:22 INFO root donut_experiment.py:62 - batch inference time:1.0524067878723145 s +2023-10-25 19:31:23 INFO root donut_experiment.py:62 - batch inference time:1.5648419857025146 s +2023-10-25 19:31:25 INFO root donut_experiment.py:62 - batch inference time:1.65240478515625 s +2023-10-25 19:31:25 INFO root donut_experiment.py:62 - batch inference time:0.4962613582611084 s +2023-10-25 19:31:27 INFO root donut_experiment.py:62 - batch inference time:1.4774868488311768 s +2023-10-25 19:31:28 INFO root donut_experiment.py:62 - batch inference time:1.0660834312438965 s +2023-10-25 19:31:29 INFO root donut_experiment.py:62 - batch inference time:1.4206538200378418 s +2023-10-25 19:31:30 INFO root donut_experiment.py:62 - batch inference time:1.1408674716949463 s +2023-10-25 19:31:32 INFO root donut_experiment.py:62 - batch inference time:1.333655595779419 s +2023-10-25 19:31:33 INFO root donut_experiment.py:62 - batch inference time:1.227491855621338 s +2023-10-25 19:31:33 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:31:33 INFO root donut_experiment.py:72 - token_acc: 0.4143389199255121; edit_dis: 0.11325627891920312 +2023-10-25 19:31:38 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch8_step2637_lr1.312916e-05_avg_loss0.08226_token_acc0.41434_edit_dis0.11326.pth +2023-10-25 19:31:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2640, lr:1.311246e-05, step_mean_loss:0.0034610808361321688, average_loss:0.08218047038531177), time, (train_step_time: 0.38565s, train_average_time: 0.39595s);(grad_norm_mean: nan, grad_norm_step: 0.30218) +2023-10-25 19:31:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2660, lr:1.300085e-05, step_mean_loss:0.011043110862374306, average_loss:0.08166687337953432), time, (train_step_time: 0.38681s, train_average_time: 0.39594s);(grad_norm_mean: nan, grad_norm_step: 1.46340) +2023-10-25 19:31:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2680, lr:1.288882e-05, step_mean_loss:0.0003075826389249414, average_loss:0.08116928142750221), time, (train_step_time: 0.38809s, train_average_time: 0.39590s);(grad_norm_mean: nan, grad_norm_step: 0.03914) +2023-10-25 19:32:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2700, lr:1.277640e-05, step_mean_loss:0.03443962708115578, average_loss:0.08069618562115702), time, (train_step_time: 0.41753s, train_average_time: 0.39587s);(grad_norm_mean: nan, grad_norm_step: 2.81018) +2023-10-25 19:32:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2720, lr:1.266360e-05, step_mean_loss:0.011686171405017376, average_loss:0.08026044707557423), time, (train_step_time: 0.39540s, train_average_time: 0.39589s);(grad_norm_mean: nan, grad_norm_step: 2.19344) +2023-10-25 19:32:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2740, lr:1.255043e-05, step_mean_loss:0.024356458336114883, average_loss:0.07983429534983207), time, (train_step_time: 0.38097s, train_average_time: 0.39590s);(grad_norm_mean: nan, grad_norm_step: 2.33706) +2023-10-25 19:32:27 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2760, lr:1.243691e-05, step_mean_loss:0.016221482306718826, average_loss:0.07936582108322646), time, (train_step_time: 0.38367s, train_average_time: 0.39589s);(grad_norm_mean: nan, grad_norm_step: 1.56458) +2023-10-25 19:32:35 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2780, lr:1.232305e-05, step_mean_loss:0.019297756254673004, average_loss:0.07893717724619695), time, (train_step_time: 0.39770s, train_average_time: 0.39588s);(grad_norm_mean: nan, grad_norm_step: 1.35242) +2023-10-25 19:32:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2800, lr:1.220888e-05, step_mean_loss:0.06730740517377853, average_loss:0.07851740402007895), time, (train_step_time: 0.39196s, train_average_time: 0.39593s);(grad_norm_mean: nan, grad_norm_step: 3.19613) +2023-10-25 19:32:51 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2820, lr:1.209440e-05, step_mean_loss:0.0038872782606631517, average_loss:0.078105334825177), time, (train_step_time: 0.40196s, train_average_time: 0.39596s);(grad_norm_mean: nan, grad_norm_step: 0.35234) +2023-10-25 19:32:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2840, lr:1.197964e-05, step_mean_loss:0.02060732990503311, average_loss:0.07768382463975901), time, (train_step_time: 0.40682s, train_average_time: 0.39595s);(grad_norm_mean: nan, grad_norm_step: 2.12470) +2023-10-25 19:33:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2860, lr:1.186460e-05, step_mean_loss:0.009904113598167896, average_loss:0.07721924891201734), time, (train_step_time: 0.39841s, train_average_time: 0.39602s);(grad_norm_mean: nan, grad_norm_step: 4.06425) +2023-10-25 19:33:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2880, lr:1.174931e-05, step_mean_loss:0.02150542102754116, average_loss:0.07676428399759061), time, (train_step_time: 0.39697s, train_average_time: 0.39607s);(grad_norm_mean: nan, grad_norm_step: 0.72287) +2023-10-25 19:33:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2900, lr:1.163377e-05, step_mean_loss:0.007885707542300224, average_loss:0.07630839677473855), time, (train_step_time: 0.42445s, train_average_time: 0.39608s);(grad_norm_mean: nan, grad_norm_step: 0.75130) +2023-10-25 19:33:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2920, lr:1.151802e-05, step_mean_loss:0.036149024963378906, average_loss:0.07591075175117373), time, (train_step_time: 0.36876s, train_average_time: 0.39596s);(grad_norm_mean: nan, grad_norm_step: 4.14332) +2023-10-25 19:33:34 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 9, steps: 2930); +2023-10-25 19:33:37 INFO root donut_experiment.py:62 - batch inference time:2.3594541549682617 s +2023-10-25 19:33:39 INFO root donut_experiment.py:62 - batch inference time:1.8924667835235596 s +2023-10-25 19:33:40 INFO root donut_experiment.py:62 - batch inference time:1.046605110168457 s +2023-10-25 19:33:41 INFO root donut_experiment.py:62 - batch inference time:0.6781840324401855 s +2023-10-25 19:33:42 INFO root donut_experiment.py:62 - batch inference time:1.4928407669067383 s +2023-10-25 19:33:43 INFO root donut_experiment.py:62 - batch inference time:0.7844088077545166 s +2023-10-25 19:33:43 INFO root donut_experiment.py:62 - batch inference time:0.557675838470459 s +2023-10-25 19:33:44 INFO root donut_experiment.py:62 - batch inference time:0.9930076599121094 s +2023-10-25 19:33:45 INFO root donut_experiment.py:62 - batch inference time:1.048340082168579 s +2023-10-25 19:33:47 INFO root donut_experiment.py:62 - batch inference time:1.5792384147644043 s +2023-10-25 19:33:49 INFO root donut_experiment.py:62 - batch inference time:1.6156468391418457 s +2023-10-25 19:33:49 INFO root donut_experiment.py:62 - batch inference time:0.45030832290649414 s +2023-10-25 19:33:51 INFO root donut_experiment.py:62 - batch inference time:1.4073565006256104 s +2023-10-25 19:33:52 INFO root donut_experiment.py:62 - batch inference time:1.140000820159912 s +2023-10-25 19:33:53 INFO root donut_experiment.py:62 - batch inference time:1.4392080307006836 s +2023-10-25 19:33:54 INFO root donut_experiment.py:62 - batch inference time:1.1105947494506836 s +2023-10-25 19:33:55 INFO root donut_experiment.py:62 - batch inference time:1.2458701133728027 s +2023-10-25 19:33:57 INFO root donut_experiment.py:62 - batch inference time:1.2278132438659668 s +2023-10-25 19:33:57 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:33:57 INFO root donut_experiment.py:72 - token_acc: 0.4354382657869934; edit_dis: 0.11669493661793284 +2023-10-25 19:34:01 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch9_step2930_lr1.146006e-05_avg_loss0.07574_token_acc0.43544_edit_dis0.11669.pth +2023-10-25 19:34:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 2940, lr:1.140205e-05, step_mean_loss:0.0003247109998483211, average_loss:0.07552106976322494), time, (train_step_time: 0.38093s, train_average_time: 0.39591s);(grad_norm_mean: nan, grad_norm_step: 0.02355) +2023-10-25 19:34:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 2960, lr:1.128589e-05, step_mean_loss:0.006514119915664196, average_loss:0.07508349740598803), time, (train_step_time: 0.38336s, train_average_time: 0.39595s);(grad_norm_mean: nan, grad_norm_step: 1.75000) +2023-10-25 19:34:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 2980, lr:1.116955e-05, step_mean_loss:0.0009788618190214038, average_loss:0.07468681211444926), time, (train_step_time: 0.40175s, train_average_time: 0.39592s);(grad_norm_mean: nan, grad_norm_step: 0.07605) +2023-10-25 19:34:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3000, lr:1.105306e-05, step_mean_loss:0.0038200542330741882, average_loss:0.07429092889584232), time, (train_step_time: 0.39726s, train_average_time: 0.39590s);(grad_norm_mean: nan, grad_norm_step: 0.33520) +2023-10-25 19:34:30 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 10, steps: 3000); +2023-10-25 19:34:33 INFO root donut_experiment.py:62 - batch inference time:2.3986661434173584 s +2023-10-25 19:34:34 INFO root donut_experiment.py:62 - batch inference time:1.8792521953582764 s +2023-10-25 19:34:36 INFO root donut_experiment.py:62 - batch inference time:1.0726535320281982 s +2023-10-25 19:34:36 INFO root donut_experiment.py:62 - batch inference time:0.710536003112793 s +2023-10-25 19:34:38 INFO root donut_experiment.py:62 - batch inference time:1.4974150657653809 s +2023-10-25 19:34:39 INFO root donut_experiment.py:62 - batch inference time:0.7855119705200195 s +2023-10-25 19:34:39 INFO root donut_experiment.py:62 - batch inference time:0.55684494972229 s +2023-10-25 19:34:40 INFO root donut_experiment.py:62 - batch inference time:0.9950141906738281 s +2023-10-25 19:34:41 INFO root donut_experiment.py:62 - batch inference time:1.0873486995697021 s +2023-10-25 19:34:43 INFO root donut_experiment.py:62 - batch inference time:1.566899061203003 s +2023-10-25 19:34:45 INFO root donut_experiment.py:62 - batch inference time:2.14684796333313 s +2023-10-25 19:34:45 INFO root donut_experiment.py:62 - batch inference time:0.49546051025390625 s +2023-10-25 19:34:54 INFO root donut_experiment.py:62 - batch inference time:8.858655214309692 s +2023-10-25 19:34:56 INFO root donut_experiment.py:62 - batch inference time:1.2008700370788574 s +2023-10-25 19:34:57 INFO root donut_experiment.py:62 - batch inference time:1.4492380619049072 s +2023-10-25 19:34:58 INFO root donut_experiment.py:62 - batch inference time:1.0940182209014893 s +2023-10-25 19:35:00 INFO root donut_experiment.py:62 - batch inference time:1.4102671146392822 s +2023-10-25 19:35:01 INFO root donut_experiment.py:62 - batch inference time:1.2232460975646973 s +2023-10-25 19:35:01 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:35:01 INFO root donut_experiment.py:72 - token_acc: 0.3203485392106612; edit_dis: 0.20681046163567346 +2023-10-25 19:35:05 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch10_step3000_lr1.105306e-05_avg_loss0.07429_token_acc0.32035_edit_dis0.20681.pth +2023-10-25 19:35:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3020, lr:1.093641e-05, step_mean_loss:0.01234960462898016, average_loss:0.07389325083261652), time, (train_step_time: 0.38512s, train_average_time: 0.39589s);(grad_norm_mean: nan, grad_norm_step: 2.66188) +2023-10-25 19:35:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3040, lr:1.081964e-05, step_mean_loss:0.014834890142083168, average_loss:0.0735804060784846), time, (train_step_time: 0.41113s, train_average_time: 0.39591s);(grad_norm_mean: nan, grad_norm_step: 0.59596) +2023-10-25 19:35:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3060, lr:1.070276e-05, step_mean_loss:0.003860659198835492, average_loss:0.07315191983731233), time, (train_step_time: 0.39760s, train_average_time: 0.39588s);(grad_norm_mean: nan, grad_norm_step: 0.21968) +2023-10-25 19:35:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3080, lr:1.058578e-05, step_mean_loss:0.0007426597876474261, average_loss:0.07280800035943874), time, (train_step_time: 0.43472s, train_average_time: 0.39592s);(grad_norm_mean: nan, grad_norm_step: 0.09205) +2023-10-25 19:35:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3100, lr:1.046872e-05, step_mean_loss:0.0031531243585050106, average_loss:0.07246530324591027), time, (train_step_time: 0.41823s, train_average_time: 0.39596s);(grad_norm_mean: nan, grad_norm_step: 0.36443) +2023-10-25 19:35:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3120, lr:1.035160e-05, step_mean_loss:0.00761839747428894, average_loss:0.07210461293439623), time, (train_step_time: 0.42017s, train_average_time: 0.39596s);(grad_norm_mean: nan, grad_norm_step: 1.05379) +2023-10-25 19:36:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3140, lr:1.023443e-05, step_mean_loss:0.0015382908750325441, average_loss:0.07174694869019128), time, (train_step_time: 0.38136s, train_average_time: 0.39596s);(grad_norm_mean: nan, grad_norm_step: 0.12019) +2023-10-25 19:36:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3160, lr:1.011722e-05, step_mean_loss:0.0013042185455560684, average_loss:0.0713665251971432), time, (train_step_time: 0.38544s, train_average_time: 0.39599s);(grad_norm_mean: nan, grad_norm_step: 0.16626) +2023-10-25 19:36:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3180, lr:1.000000e-05, step_mean_loss:0.010588837787508965, average_loss:0.07101049317834908), time, (train_step_time: 0.39470s, train_average_time: 0.39602s);(grad_norm_mean: nan, grad_norm_step: 1.06297) +2023-10-25 19:36:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3200, lr:9.882779e-06, step_mean_loss:0.04339747130870819, average_loss:0.07067690875844619), time, (train_step_time: 0.38844s, train_average_time: 0.39604s);(grad_norm_mean: nan, grad_norm_step: 3.72172) +2023-10-25 19:36:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3220, lr:9.765574e-06, step_mean_loss:0.0032899975776672363, average_loss:0.07031946185517023), time, (train_step_time: 0.36586s, train_average_time: 0.39588s);(grad_norm_mean: nan, grad_norm_step: 0.59844) +2023-10-25 19:36:33 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 10, steps: 3223); +2023-10-25 19:36:36 INFO root donut_experiment.py:62 - batch inference time:2.395021915435791 s +2023-10-25 19:36:38 INFO root donut_experiment.py:62 - batch inference time:1.9025702476501465 s +2023-10-25 19:36:39 INFO root donut_experiment.py:62 - batch inference time:1.0560078620910645 s +2023-10-25 19:36:40 INFO root donut_experiment.py:62 - batch inference time:0.714181661605835 s +2023-10-25 19:36:42 INFO root donut_experiment.py:62 - batch inference time:1.5023984909057617 s +2023-10-25 19:36:42 INFO root donut_experiment.py:62 - batch inference time:0.8789529800415039 s +2023-10-25 19:36:43 INFO root donut_experiment.py:62 - batch inference time:0.5627140998840332 s +2023-10-25 19:36:44 INFO root donut_experiment.py:62 - batch inference time:0.9776473045349121 s +2023-10-25 19:36:45 INFO root donut_experiment.py:62 - batch inference time:1.0898349285125732 s +2023-10-25 19:36:47 INFO root donut_experiment.py:62 - batch inference time:1.5904808044433594 s +2023-10-25 19:36:49 INFO root donut_experiment.py:62 - batch inference time:2.0505449771881104 s +2023-10-25 19:36:49 INFO root donut_experiment.py:62 - batch inference time:0.45125317573547363 s +2023-10-25 19:36:51 INFO root donut_experiment.py:62 - batch inference time:1.4644596576690674 s +2023-10-25 19:36:52 INFO root donut_experiment.py:62 - batch inference time:1.2644414901733398 s +2023-10-25 19:36:54 INFO root donut_experiment.py:62 - batch inference time:1.56856107711792 s +2023-10-25 19:36:55 INFO root donut_experiment.py:62 - batch inference time:1.1188952922821045 s +2023-10-25 19:36:56 INFO root donut_experiment.py:62 - batch inference time:1.3626174926757812 s +2023-10-25 19:36:57 INFO root donut_experiment.py:62 - batch inference time:1.2353510856628418 s +2023-10-25 19:36:57 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:36:57 INFO root donut_experiment.py:72 - token_acc: 0.3914788514973757; edit_dis: 0.12245849531372523 +2023-10-25 19:37:02 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch10_step3223_lr9.747996e-06_avg_loss0.07028_token_acc0.39148_edit_dis0.12246.pth +2023-10-25 19:37:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3240, lr:9.648402e-06, step_mean_loss:0.006916874088346958, average_loss:0.06995933341249276), time, (train_step_time: 0.40671s, train_average_time: 0.39586s);(grad_norm_mean: nan, grad_norm_step: 0.58952) +2023-10-25 19:37:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3260, lr:9.531277e-06, step_mean_loss:0.0314754843711853, average_loss:0.06959956166727888), time, (train_step_time: 0.40837s, train_average_time: 0.39595s);(grad_norm_mean: nan, grad_norm_step: 1.53829) +2023-10-25 19:37:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3280, lr:9.414217e-06, step_mean_loss:0.011950640007853508, average_loss:0.06921022051601318), time, (train_step_time: 0.37861s, train_average_time: 0.39598s);(grad_norm_mean: nan, grad_norm_step: 1.99014) +2023-10-25 19:37:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3300, lr:9.297238e-06, step_mean_loss:0.0036429434549063444, average_loss:0.06884590250249004), time, (train_step_time: 0.38103s, train_average_time: 0.39600s);(grad_norm_mean: nan, grad_norm_step: 0.44998) +2023-10-25 19:37:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3320, lr:9.180355e-06, step_mean_loss:0.007598962169140577, average_loss:0.06848044470544741), time, (train_step_time: 0.46481s, train_average_time: 0.39602s);(grad_norm_mean: nan, grad_norm_step: 0.76468) +2023-10-25 19:37:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3340, lr:9.063585e-06, step_mean_loss:0.02388167940080166, average_loss:0.06812998551007379), time, (train_step_time: 0.38065s, train_average_time: 0.39599s);(grad_norm_mean: nan, grad_norm_step: 2.00073) +2023-10-25 19:37:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3360, lr:8.946944e-06, step_mean_loss:0.0005760741187259555, average_loss:0.06781827919023618), time, (train_step_time: 0.41308s, train_average_time: 0.39598s);(grad_norm_mean: nan, grad_norm_step: 0.08112) +2023-10-25 19:38:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3380, lr:8.830447e-06, step_mean_loss:0.01213748101145029, average_loss:0.06746072511334808), time, (train_step_time: 0.37331s, train_average_time: 0.39595s);(grad_norm_mean: nan, grad_norm_step: 2.03351) +2023-10-25 19:38:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3400, lr:8.714111e-06, step_mean_loss:0.011103244498372078, average_loss:0.06710944909803135), time, (train_step_time: 0.37522s, train_average_time: 0.39597s);(grad_norm_mean: nan, grad_norm_step: 1.55217) +2023-10-25 19:38:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3420, lr:8.597951e-06, step_mean_loss:0.0036638344172388315, average_loss:0.06682765836458894), time, (train_step_time: 0.37678s, train_average_time: 0.39599s);(grad_norm_mean: nan, grad_norm_step: 0.40566) +2023-10-25 19:38:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3440, lr:8.481985e-06, step_mean_loss:0.0018416099483147264, average_loss:0.06646821779900905), time, (train_step_time: 0.40103s, train_average_time: 0.39598s);(grad_norm_mean: nan, grad_norm_step: 0.24177) +2023-10-25 19:38:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3460, lr:8.366226e-06, step_mean_loss:0.007221085485070944, average_loss:0.06614542168693706), time, (train_step_time: 0.37427s, train_average_time: 0.39594s);(grad_norm_mean: nan, grad_norm_step: 3.42210) +2023-10-25 19:38:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3480, lr:8.250693e-06, step_mean_loss:0.0009404111769981682, average_loss:0.06582346671369113), time, (train_step_time: 0.38969s, train_average_time: 0.39592s);(grad_norm_mean: nan, grad_norm_step: 0.06899) +2023-10-25 19:38:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3500, lr:8.135399e-06, step_mean_loss:0.013384915888309479, average_loss:0.06547870373670593), time, (train_step_time: 0.35781s, train_average_time: 0.39587s);(grad_norm_mean: nan, grad_norm_step: 1.28284) +2023-10-25 19:38:58 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 11, steps: 3516); +2023-10-25 19:39:01 INFO root donut_experiment.py:62 - batch inference time:2.3619673252105713 s +2023-10-25 19:39:03 INFO root donut_experiment.py:62 - batch inference time:1.9013361930847168 s +2023-10-25 19:39:04 INFO root donut_experiment.py:62 - batch inference time:1.1086032390594482 s +2023-10-25 19:39:05 INFO root donut_experiment.py:62 - batch inference time:0.707066535949707 s +2023-10-25 19:39:06 INFO root donut_experiment.py:62 - batch inference time:1.5104074478149414 s +2023-10-25 19:39:07 INFO root donut_experiment.py:62 - batch inference time:0.8114278316497803 s +2023-10-25 19:39:08 INFO root donut_experiment.py:62 - batch inference time:0.5610544681549072 s +2023-10-25 19:39:09 INFO root donut_experiment.py:62 - batch inference time:0.9407057762145996 s +2023-10-25 19:39:10 INFO root donut_experiment.py:62 - batch inference time:1.0532565116882324 s +2023-10-25 19:39:11 INFO root donut_experiment.py:62 - batch inference time:1.604191541671753 s +2023-10-25 19:39:13 INFO root donut_experiment.py:62 - batch inference time:1.6523535251617432 s +2023-10-25 19:39:13 INFO root donut_experiment.py:62 - batch inference time:0.4539768695831299 s +2023-10-25 19:39:15 INFO root donut_experiment.py:62 - batch inference time:1.4737849235534668 s +2023-10-25 19:39:16 INFO root donut_experiment.py:62 - batch inference time:1.2090094089508057 s +2023-10-25 19:39:18 INFO root donut_experiment.py:62 - batch inference time:1.7875266075134277 s +2023-10-25 19:39:19 INFO root donut_experiment.py:62 - batch inference time:1.119464635848999 s +2023-10-25 19:39:20 INFO root donut_experiment.py:62 - batch inference time:1.353917121887207 s +2023-10-25 19:39:22 INFO root donut_experiment.py:62 - batch inference time:1.2268445491790771 s +2023-10-25 19:39:22 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:39:22 INFO root donut_experiment.py:72 - token_acc: 0.37484355444305384; edit_dis: 0.10903100826717091 +2023-10-25 19:39:26 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch11_step3516_lr8.043349e-06_avg_loss0.06522_token_acc0.37484_edit_dis0.10903.pth +2023-10-25 19:39:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3520, lr:8.020362e-06, step_mean_loss:0.08956998586654663, average_loss:0.0651788587472476), time, (train_step_time: 0.43742s, train_average_time: 0.39582s);(grad_norm_mean: nan, grad_norm_step: 4.54199) +2023-10-25 19:39:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3540, lr:7.905597e-06, step_mean_loss:0.0004225101147312671, average_loss:0.06485344318215985), time, (train_step_time: 0.39230s, train_average_time: 0.39577s);(grad_norm_mean: nan, grad_norm_step: 0.03527) +2023-10-25 19:39:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3560, lr:7.791120e-06, step_mean_loss:0.0001897893234854564, average_loss:0.0645054372384958), time, (train_step_time: 0.38759s, train_average_time: 0.39580s);(grad_norm_mean: nan, grad_norm_step: 0.02189) +2023-10-25 19:39:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3580, lr:7.676946e-06, step_mean_loss:0.00278597604483366, average_loss:0.06421184287549463), time, (train_step_time: 0.44909s, train_average_time: 0.39583s);(grad_norm_mean: nan, grad_norm_step: 0.48626) +2023-10-25 19:40:00 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3600, lr:7.563092e-06, step_mean_loss:0.14591604471206665, average_loss:0.06397710735821723), time, (train_step_time: 0.38912s, train_average_time: 0.39586s);(grad_norm_mean: nan, grad_norm_step: 8.13984) +2023-10-25 19:40:08 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3620, lr:7.449572e-06, step_mean_loss:0.0012769444147124887, average_loss:0.06366979909288858), time, (train_step_time: 0.43225s, train_average_time: 0.39595s);(grad_norm_mean: nan, grad_norm_step: 0.26858) +2023-10-25 19:40:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3640, lr:7.336403e-06, step_mean_loss:0.010973346419632435, average_loss:0.06338942383502819), time, (train_step_time: 0.44676s, train_average_time: 0.39605s);(grad_norm_mean: nan, grad_norm_step: 5.20197) +2023-10-25 19:40:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3660, lr:7.223600e-06, step_mean_loss:0.0007468066178262234, average_loss:0.06306797005679614), time, (train_step_time: 0.42170s, train_average_time: 0.39613s);(grad_norm_mean: nan, grad_norm_step: 0.07469) +2023-10-25 19:40:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3680, lr:7.111178e-06, step_mean_loss:0.005027547478675842, average_loss:0.06276194053052825), time, (train_step_time: 0.47061s, train_average_time: 0.39617s);(grad_norm_mean: nan, grad_norm_step: 0.42092) +2023-10-25 19:40:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3700, lr:6.999154e-06, step_mean_loss:0.005346422549337149, average_loss:0.06244696322073552), time, (train_step_time: 0.37424s, train_average_time: 0.39614s);(grad_norm_mean: nan, grad_norm_step: 0.87140) +2023-10-25 19:40:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3720, lr:6.887541e-06, step_mean_loss:0.004987229127436876, average_loss:0.06214980571679555), time, (train_step_time: 0.39029s, train_average_time: 0.39617s);(grad_norm_mean: nan, grad_norm_step: 0.62578) +2023-10-25 19:40:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3740, lr:6.776357e-06, step_mean_loss:0.007102384697645903, average_loss:0.06185427886799847), time, (train_step_time: 0.42023s, train_average_time: 0.39625s);(grad_norm_mean: nan, grad_norm_step: 1.44366) +2023-10-25 19:41:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3760, lr:6.665615e-06, step_mean_loss:0.0007224871078506112, average_loss:0.061599953967645), time, (train_step_time: 0.42092s, train_average_time: 0.39638s);(grad_norm_mean: nan, grad_norm_step: 0.08083) +2023-10-25 19:41:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3780, lr:6.555331e-06, step_mean_loss:0.08961474150419235, average_loss:0.06131724803910459), time, (train_step_time: 0.42388s, train_average_time: 0.39644s);(grad_norm_mean: nan, grad_norm_step: 4.14483) +2023-10-25 19:41:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3800, lr:6.445521e-06, step_mean_loss:0.000706732738763094, average_loss:0.06103542978325286), time, (train_step_time: 0.40128s, train_average_time: 0.39645s);(grad_norm_mean: nan, grad_norm_step: 0.06118) +2023-10-25 19:41:25 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 12, steps: 3809); +2023-10-25 19:41:28 INFO root donut_experiment.py:62 - batch inference time:2.478738784790039 s +2023-10-25 19:41:30 INFO root donut_experiment.py:62 - batch inference time:1.9563355445861816 s +2023-10-25 19:41:31 INFO root donut_experiment.py:62 - batch inference time:1.025892734527588 s +2023-10-25 19:41:32 INFO root donut_experiment.py:62 - batch inference time:0.7004632949829102 s +2023-10-25 19:41:34 INFO root donut_experiment.py:62 - batch inference time:1.499049186706543 s +2023-10-25 19:41:34 INFO root donut_experiment.py:62 - batch inference time:0.7894017696380615 s +2023-10-25 19:41:35 INFO root donut_experiment.py:62 - batch inference time:0.559373140335083 s +2023-10-25 19:41:36 INFO root donut_experiment.py:62 - batch inference time:0.9849748611450195 s +2023-10-25 19:41:37 INFO root donut_experiment.py:62 - batch inference time:1.053636074066162 s +2023-10-25 19:41:39 INFO root donut_experiment.py:62 - batch inference time:1.6085622310638428 s +2023-10-25 19:41:40 INFO root donut_experiment.py:62 - batch inference time:1.8647427558898926 s +2023-10-25 19:41:41 INFO root donut_experiment.py:62 - batch inference time:0.5039291381835938 s +2023-10-25 19:41:43 INFO root donut_experiment.py:62 - batch inference time:1.7018053531646729 s +2023-10-25 19:41:44 INFO root donut_experiment.py:62 - batch inference time:1.1268982887268066 s +2023-10-25 19:41:45 INFO root donut_experiment.py:62 - batch inference time:1.5532994270324707 s +2023-10-25 19:41:46 INFO root donut_experiment.py:62 - batch inference time:1.1130239963531494 s +2023-10-25 19:41:48 INFO root donut_experiment.py:62 - batch inference time:1.3437433242797852 s +2023-10-25 19:41:49 INFO root donut_experiment.py:62 - batch inference time:1.2284607887268066 s +2023-10-25 19:41:49 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:41:49 INFO root donut_experiment.py:72 - token_acc: 0.351828890266584; edit_dis: 0.10734596540960926 +2023-10-25 19:41:53 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch12_step3809_lr6.396265e-06_avg_loss0.06090_token_acc0.35183_edit_dis0.10735.pth +2023-10-25 19:41:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3820, lr:6.336200e-06, step_mean_loss:0.00013452480197884142, average_loss:0.06075638456436274), time, (train_step_time: 0.38725s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.01549) +2023-10-25 19:42:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3840, lr:6.227381e-06, step_mean_loss:0.018475480377674103, average_loss:0.06049232238112315), time, (train_step_time: 0.40489s, train_average_time: 0.39641s);(grad_norm_mean: nan, grad_norm_step: 1.80757) +2023-10-25 19:42:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3860, lr:6.119081e-06, step_mean_loss:0.0010949454735964537, average_loss:0.06019333423697399), time, (train_step_time: 0.38515s, train_average_time: 0.39641s);(grad_norm_mean: nan, grad_norm_step: 0.06758) +2023-10-25 19:42:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3880, lr:6.011315e-06, step_mean_loss:0.00023898674407973886, average_loss:0.05994410416238051), time, (train_step_time: 0.37768s, train_average_time: 0.39639s);(grad_norm_mean: nan, grad_norm_step: 0.02105) +2023-10-25 19:42:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3900, lr:5.904096e-06, step_mean_loss:0.003181066829711199, average_loss:0.05967338186791937), time, (train_step_time: 0.38475s, train_average_time: 0.39640s);(grad_norm_mean: nan, grad_norm_step: 0.39210) +2023-10-25 19:42:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3920, lr:5.797441e-06, step_mean_loss:0.0012872738298028708, average_loss:0.05941129771983924), time, (train_step_time: 0.42961s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.16731) +2023-10-25 19:42:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3940, lr:5.691363e-06, step_mean_loss:0.002127988263964653, average_loss:0.0591469240904166), time, (train_step_time: 0.39408s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.61815) +2023-10-25 19:42:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3960, lr:5.585876e-06, step_mean_loss:0.0016165077686309814, average_loss:0.05888561956265019), time, (train_step_time: 0.41335s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.15463) +2023-10-25 19:43:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3980, lr:5.480997e-06, step_mean_loss:0.0003294682828709483, average_loss:0.05861440301967642), time, (train_step_time: 0.37636s, train_average_time: 0.39652s);(grad_norm_mean: nan, grad_norm_step: 0.03711) +2023-10-25 19:43:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4000, lr:5.376738e-06, step_mean_loss:0.0012476957635954022, average_loss:0.0583678155427574), time, (train_step_time: 0.38401s, train_average_time: 0.39652s);(grad_norm_mean: nan, grad_norm_step: 0.44658) +2023-10-25 19:43:10 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 13, steps: 4000); +2023-10-25 19:43:13 INFO root donut_experiment.py:62 - batch inference time:2.5147736072540283 s +2023-10-25 19:43:15 INFO root donut_experiment.py:62 - batch inference time:1.8776042461395264 s +2023-10-25 19:43:16 INFO root donut_experiment.py:62 - batch inference time:1.0969276428222656 s +2023-10-25 19:43:17 INFO root donut_experiment.py:62 - batch inference time:0.7131152153015137 s +2023-10-25 19:43:18 INFO root donut_experiment.py:62 - batch inference time:1.505880355834961 s +2023-10-25 19:43:19 INFO root donut_experiment.py:62 - batch inference time:0.8070721626281738 s +2023-10-25 19:43:20 INFO root donut_experiment.py:62 - batch inference time:0.5651617050170898 s +2023-10-25 19:43:21 INFO root donut_experiment.py:62 - batch inference time:0.9973888397216797 s +2023-10-25 19:43:22 INFO root donut_experiment.py:62 - batch inference time:1.095160961151123 s +2023-10-25 19:43:23 INFO root donut_experiment.py:62 - batch inference time:1.5884554386138916 s +2023-10-25 19:43:25 INFO root donut_experiment.py:62 - batch inference time:1.4097156524658203 s +2023-10-25 19:43:25 INFO root donut_experiment.py:62 - batch inference time:0.4966309070587158 s +2023-10-25 19:43:27 INFO root donut_experiment.py:62 - batch inference time:1.4602560997009277 s +2023-10-25 19:43:28 INFO root donut_experiment.py:62 - batch inference time:1.128617763519287 s +2023-10-25 19:43:29 INFO root donut_experiment.py:62 - batch inference time:1.48374342918396 s +2023-10-25 19:43:30 INFO root donut_experiment.py:62 - batch inference time:1.0303125381469727 s +2023-10-25 19:43:32 INFO root donut_experiment.py:62 - batch inference time:1.3496859073638916 s +2023-10-25 19:43:33 INFO root donut_experiment.py:62 - batch inference time:1.2387645244598389 s +2023-10-25 19:43:33 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:43:33 INFO root donut_experiment.py:72 - token_acc: 0.3845911949685535; edit_dis: 0.10550190842933725 +2023-10-25 19:43:38 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch13_step4000_lr5.376738e-06_avg_loss0.05837_token_acc0.38459_edit_dis0.10550.pth +2023-10-25 19:43:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4020, lr:5.273115e-06, step_mean_loss:0.0003306316211819649, average_loss:0.05810135628369345), time, (train_step_time: 0.37975s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.02864) +2023-10-25 19:43:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4040, lr:5.170141e-06, step_mean_loss:0.0001423735957359895, average_loss:0.05784984258402351), time, (train_step_time: 0.39840s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 0.01087) +2023-10-25 19:44:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4060, lr:5.067831e-06, step_mean_loss:0.010565412230789661, average_loss:0.0575751774294605), time, (train_step_time: 0.38882s, train_average_time: 0.39653s);(grad_norm_mean: nan, grad_norm_step: 1.29564) +2023-10-25 19:44:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4080, lr:4.966199e-06, step_mean_loss:0.0012701294617727399, average_loss:0.057324040626506347), time, (train_step_time: 0.37716s, train_average_time: 0.39656s);(grad_norm_mean: nan, grad_norm_step: 0.15283) +2023-10-25 19:44:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4100, lr:4.865258e-06, step_mean_loss:0.0002577654959168285, average_loss:0.057080157251067895), time, (train_step_time: 0.37384s, train_average_time: 0.39642s);(grad_norm_mean: nan, grad_norm_step: 0.02406) +2023-10-25 19:44:18 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 13, steps: 4102); +2023-10-25 19:44:21 INFO root donut_experiment.py:62 - batch inference time:2.3946921825408936 s +2023-10-25 19:44:23 INFO root donut_experiment.py:62 - batch inference time:1.8664326667785645 s +2023-10-25 19:44:24 INFO root donut_experiment.py:62 - batch inference time:1.0747439861297607 s +2023-10-25 19:44:24 INFO root donut_experiment.py:62 - batch inference time:0.6985528469085693 s +2023-10-25 19:44:26 INFO root donut_experiment.py:62 - batch inference time:1.4699797630310059 s +2023-10-25 19:44:27 INFO root donut_experiment.py:62 - batch inference time:0.787299633026123 s +2023-10-25 19:44:27 INFO root donut_experiment.py:62 - batch inference time:0.5530200004577637 s +2023-10-25 19:44:28 INFO root donut_experiment.py:62 - batch inference time:0.9359838962554932 s +2023-10-25 19:44:29 INFO root donut_experiment.py:62 - batch inference time:1.06557035446167 s +2023-10-25 19:44:31 INFO root donut_experiment.py:62 - batch inference time:1.5738677978515625 s +2023-10-25 19:44:33 INFO root donut_experiment.py:62 - batch inference time:1.8959510326385498 s +2023-10-25 19:44:33 INFO root donut_experiment.py:62 - batch inference time:0.44247913360595703 s +2023-10-25 19:44:35 INFO root donut_experiment.py:62 - batch inference time:1.430361270904541 s +2023-10-25 19:44:36 INFO root donut_experiment.py:62 - batch inference time:1.2087669372558594 s +2023-10-25 19:44:37 INFO root donut_experiment.py:62 - batch inference time:1.3960893154144287 s +2023-10-25 19:44:38 INFO root donut_experiment.py:62 - batch inference time:1.0433728694915771 s +2023-10-25 19:44:40 INFO root donut_experiment.py:62 - batch inference time:1.327226161956787 s +2023-10-25 19:44:41 INFO root donut_experiment.py:62 - batch inference time:1.2117187976837158 s +2023-10-25 19:44:41 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:44:41 INFO root donut_experiment.py:72 - token_acc: 0.31334981458590855; edit_dis: 0.1234776628467832 +2023-10-25 19:44:45 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch13_step4102_lr4.855203e-06_avg_loss0.05705_token_acc0.31335_edit_dis0.12348.pth +2023-10-25 19:44:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4120, lr:4.765023e-06, step_mean_loss:0.0007352510583586991, average_loss:0.05683690042938438), time, (train_step_time: 0.37874s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 0.18530) +2023-10-25 19:45:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4140, lr:4.665507e-06, step_mean_loss:0.00011260010796831921, average_loss:0.05660157920572531), time, (train_step_time: 0.38785s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.00643) +2023-10-25 19:45:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4160, lr:4.566725e-06, step_mean_loss:0.0004615172219928354, average_loss:0.0564827191606989), time, (train_step_time: 0.39534s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 0.03369) +2023-10-25 19:45:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4180, lr:4.468688e-06, step_mean_loss:0.0020180032588541508, average_loss:0.05622651250561796), time, (train_step_time: 0.37901s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.32390) +2023-10-25 19:45:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4200, lr:4.371412e-06, step_mean_loss:0.019141802564263344, average_loss:0.05598556350116572), time, (train_step_time: 0.38134s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 3.47704) +2023-10-25 19:45:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4220, lr:4.274910e-06, step_mean_loss:0.021643033251166344, average_loss:0.05575845884304339), time, (train_step_time: 0.40765s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 2.14456) +2023-10-25 19:45:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4240, lr:4.179194e-06, step_mean_loss:0.000424734695116058, average_loss:0.055516491147609745), time, (train_step_time: 0.38699s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.03133) +2023-10-25 19:45:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4260, lr:4.084278e-06, step_mean_loss:0.00018400615954305977, average_loss:0.055271572310608516), time, (train_step_time: 0.40026s, train_average_time: 0.39645s);(grad_norm_mean: nan, grad_norm_step: 0.03059) +2023-10-25 19:45:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4280, lr:3.990175e-06, step_mean_loss:0.029902899637818336, average_loss:0.05503334752513787), time, (train_step_time: 0.41044s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 2.43069) +2023-10-25 19:46:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4300, lr:3.896897e-06, step_mean_loss:0.0015431438805535436, average_loss:0.05479259927051359), time, (train_step_time: 0.40290s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.16681) +2023-10-25 19:46:12 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4320, lr:3.804459e-06, step_mean_loss:0.00028538404149003327, average_loss:0.0545603955163057), time, (train_step_time: 0.39994s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.05183) +2023-10-25 19:46:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4340, lr:3.712871e-06, step_mean_loss:0.0022390747908502817, average_loss:0.054329466068953516), time, (train_step_time: 0.44198s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 0.42088) +2023-10-25 19:46:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4360, lr:3.622148e-06, step_mean_loss:0.006152530200779438, average_loss:0.05410501550400364), time, (train_step_time: 0.43063s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 0.32852) +2023-10-25 19:46:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4380, lr:3.532301e-06, step_mean_loss:7.943659875309095e-05, average_loss:0.053870183537720676), time, (train_step_time: 0.37110s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.00557) +2023-10-25 19:46:42 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 14, steps: 4395); +2023-10-25 19:46:45 INFO root donut_experiment.py:62 - batch inference time:2.537189245223999 s +2023-10-25 19:46:47 INFO root donut_experiment.py:62 - batch inference time:1.974769115447998 s +2023-10-25 19:46:48 INFO root donut_experiment.py:62 - batch inference time:1.0980021953582764 s +2023-10-25 19:46:49 INFO root donut_experiment.py:62 - batch inference time:0.8026368618011475 s +2023-10-25 19:46:51 INFO root donut_experiment.py:62 - batch inference time:1.4991371631622314 s +2023-10-25 19:46:51 INFO root donut_experiment.py:62 - batch inference time:0.7867968082427979 s +2023-10-25 19:46:52 INFO root donut_experiment.py:62 - batch inference time:0.5605819225311279 s +2023-10-25 19:46:53 INFO root donut_experiment.py:62 - batch inference time:0.9946720600128174 s +2023-10-25 19:46:54 INFO root donut_experiment.py:62 - batch inference time:1.0534977912902832 s +2023-10-25 19:46:56 INFO root donut_experiment.py:62 - batch inference time:1.596160650253296 s +2023-10-25 19:46:57 INFO root donut_experiment.py:62 - batch inference time:1.7068195343017578 s +2023-10-25 19:46:58 INFO root donut_experiment.py:62 - batch inference time:0.49338412284851074 s +2023-10-25 19:46:59 INFO root donut_experiment.py:62 - batch inference time:1.496187686920166 s +2023-10-25 19:47:00 INFO root donut_experiment.py:62 - batch inference time:1.2050070762634277 s +2023-10-25 19:47:02 INFO root donut_experiment.py:62 - batch inference time:1.5088047981262207 s +2023-10-25 19:47:03 INFO root donut_experiment.py:62 - batch inference time:1.1172301769256592 s +2023-10-25 19:47:04 INFO root donut_experiment.py:62 - batch inference time:1.346877098083496 s +2023-10-25 19:47:06 INFO root donut_experiment.py:62 - batch inference time:1.2340705394744873 s +2023-10-25 19:47:06 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:47:06 INFO root donut_experiment.py:72 - token_acc: 0.3073821339950372; edit_dis: 0.10996518186538701 +2023-10-25 19:47:11 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch14_step4395_lr3.465498e-06_avg_loss0.05371_token_acc0.30738_edit_dis0.10997.pth +2023-10-25 19:47:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4400, lr:3.443343e-06, step_mean_loss:0.0014118760591372848, average_loss:0.05365727637699613), time, (train_step_time: 0.39004s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.39093) +2023-10-25 19:47:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4420, lr:3.355285e-06, step_mean_loss:0.001419189153239131, average_loss:0.05344266752584712), time, (train_step_time: 0.37580s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.16303) +2023-10-25 19:47:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4440, lr:3.268141e-06, step_mean_loss:0.003947882913053036, average_loss:0.05323351580599559), time, (train_step_time: 0.43096s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.84222) +2023-10-25 19:47:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4460, lr:3.181922e-06, step_mean_loss:0.0005250814720056951, average_loss:0.053006305543042684), time, (train_step_time: 0.37620s, train_average_time: 0.39644s);(grad_norm_mean: nan, grad_norm_step: 0.18035) +2023-10-25 19:47:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4480, lr:3.096639e-06, step_mean_loss:0.000825975148472935, average_loss:0.05278219759382239), time, (train_step_time: 0.39170s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.08894) +2023-10-25 19:47:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4500, lr:3.012306e-06, step_mean_loss:0.00044732363312505186, average_loss:0.052582221401027106), time, (train_step_time: 0.41658s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.11528) +2023-10-25 19:48:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4520, lr:2.928932e-06, step_mean_loss:0.0036689441185444593, average_loss:0.05235806489381784), time, (train_step_time: 0.37908s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.64667) +2023-10-25 19:48:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4540, lr:2.846530e-06, step_mean_loss:0.0003636888286564499, average_loss:0.05216465195657421), time, (train_step_time: 0.38917s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.09582) +2023-10-25 19:48:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4560, lr:2.765111e-06, step_mean_loss:2.399238474026788e-05, average_loss:0.05195665997058441), time, (train_step_time: 0.39749s, train_average_time: 0.39653s);(grad_norm_mean: nan, grad_norm_step: 0.00192) +2023-10-25 19:48:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4580, lr:2.684687e-06, step_mean_loss:0.003223925596103072, average_loss:0.051756347563680694), time, (train_step_time: 0.48928s, train_average_time: 0.39659s);(grad_norm_mean: nan, grad_norm_step: 0.43751) +2023-10-25 19:48:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4600, lr:2.605267e-06, step_mean_loss:0.0015589683316648006, average_loss:0.05157736003313956), time, (train_step_time: 0.39667s, train_average_time: 0.39662s);(grad_norm_mean: nan, grad_norm_step: 0.15035) +2023-10-25 19:48:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4620, lr:2.526864e-06, step_mean_loss:0.0752008855342865, average_loss:0.05138239818444513), time, (train_step_time: 0.37097s, train_average_time: 0.39658s);(grad_norm_mean: nan, grad_norm_step: 7.70569) +2023-10-25 19:48:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4640, lr:2.449487e-06, step_mean_loss:0.004956886172294617, average_loss:0.05122780727023116), time, (train_step_time: 0.36888s, train_average_time: 0.39656s);(grad_norm_mean: nan, grad_norm_step: 1.75283) +2023-10-25 19:48:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4660, lr:2.373148e-06, step_mean_loss:0.00014676836144644767, average_loss:0.05105030034143454), time, (train_step_time: 0.40095s, train_average_time: 0.39654s);(grad_norm_mean: nan, grad_norm_step: 0.01186) +2023-10-25 19:49:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4680, lr:2.297857e-06, step_mean_loss:0.010765302926301956, average_loss:0.05084637328881376), time, (train_step_time: 0.36062s, train_average_time: 0.39644s);(grad_norm_mean: nan, grad_norm_step: 2.51534) +2023-10-25 19:49:08 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 15, steps: 4688); +2023-10-25 19:49:11 INFO root donut_experiment.py:62 - batch inference time:2.493770122528076 s +2023-10-25 19:49:12 INFO root donut_experiment.py:62 - batch inference time:1.8840467929840088 s +2023-10-25 19:49:14 INFO root donut_experiment.py:62 - batch inference time:1.1003377437591553 s +2023-10-25 19:49:14 INFO root donut_experiment.py:62 - batch inference time:0.7151138782501221 s +2023-10-25 19:49:16 INFO root donut_experiment.py:62 - batch inference time:1.5101916790008545 s +2023-10-25 19:49:17 INFO root donut_experiment.py:62 - batch inference time:0.8452785015106201 s +2023-10-25 19:49:17 INFO root donut_experiment.py:62 - batch inference time:0.5703427791595459 s +2023-10-25 19:49:19 INFO root donut_experiment.py:62 - batch inference time:1.2561852931976318 s +2023-10-25 19:49:20 INFO root donut_experiment.py:62 - batch inference time:1.3364899158477783 s +2023-10-25 19:49:22 INFO root donut_experiment.py:62 - batch inference time:2.0323550701141357 s +2023-10-25 19:49:24 INFO root donut_experiment.py:62 - batch inference time:2.306330442428589 s +2023-10-25 19:49:25 INFO root donut_experiment.py:62 - batch inference time:0.6272609233856201 s +2023-10-25 19:49:27 INFO root donut_experiment.py:62 - batch inference time:1.8762495517730713 s +2023-10-25 19:49:28 INFO root donut_experiment.py:62 - batch inference time:1.227034568786621 s +2023-10-25 19:49:29 INFO root donut_experiment.py:62 - batch inference time:1.4490361213684082 s +2023-10-25 19:49:31 INFO root donut_experiment.py:62 - batch inference time:1.1216046810150146 s +2023-10-25 19:49:32 INFO root donut_experiment.py:62 - batch inference time:1.3527262210845947 s +2023-10-25 19:49:33 INFO root donut_experiment.py:62 - batch inference time:1.2685906887054443 s +2023-10-25 19:49:33 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:49:33 INFO root donut_experiment.py:72 - token_acc: 0.3788067122436296; edit_dis: 0.10355720694765749 +2023-10-25 19:49:38 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch15_step4688_lr2.268037e-06_avg_loss0.05076_token_acc0.37881_edit_dis0.10356.pth +2023-10-25 19:49:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4700, lr:2.223625e-06, step_mean_loss:0.0003635183093138039, average_loss:0.050644448051061774), time, (train_step_time: 0.39211s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.05729) +2023-10-25 19:49:51 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4720, lr:2.150461e-06, step_mean_loss:0.0006137699820101261, average_loss:0.050475826613777273), time, (train_step_time: 0.38670s, train_average_time: 0.39642s);(grad_norm_mean: nan, grad_norm_step: 0.04927) +2023-10-25 19:49:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4740, lr:2.078375e-06, step_mean_loss:0.003044706303626299, average_loss:0.05026668132567876), time, (train_step_time: 0.43701s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.71637) +2023-10-25 19:50:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4760, lr:2.007378e-06, step_mean_loss:0.0008453342597931623, average_loss:0.05006863652859832), time, (train_step_time: 0.39119s, train_average_time: 0.39645s);(grad_norm_mean: nan, grad_norm_step: 0.08601) +2023-10-25 19:50:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4780, lr:1.937480e-06, step_mean_loss:0.02458561211824417, average_loss:0.0498944197999267), time, (train_step_time: 0.38912s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 4.16194) +2023-10-25 19:50:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4800, lr:1.868689e-06, step_mean_loss:0.02027171105146408, average_loss:0.049708499876493686), time, (train_step_time: 0.41708s, train_average_time: 0.39653s);(grad_norm_mean: nan, grad_norm_step: 1.30535) +2023-10-25 19:50:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4820, lr:1.801016e-06, step_mean_loss:0.00014269012899603695, average_loss:0.04950705324379186), time, (train_step_time: 0.42983s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.03294) +2023-10-25 19:50:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4840, lr:1.734469e-06, step_mean_loss:0.0006731321336701512, average_loss:0.04931444777965165), time, (train_step_time: 0.37902s, train_average_time: 0.39653s);(grad_norm_mean: nan, grad_norm_step: 0.06576) +2023-10-25 19:50:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4860, lr:1.669058e-06, step_mean_loss:0.00220434064976871, average_loss:0.04912161985872302), time, (train_step_time: 0.43436s, train_average_time: 0.39656s);(grad_norm_mean: nan, grad_norm_step: 0.34521) +2023-10-25 19:50:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4880, lr:1.604792e-06, step_mean_loss:0.00014995638048276305, average_loss:0.04894968160781557), time, (train_step_time: 0.44894s, train_average_time: 0.39658s);(grad_norm_mean: nan, grad_norm_step: 0.01189) +2023-10-25 19:51:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4900, lr:1.541679e-06, step_mean_loss:0.0017024795524775982, average_loss:0.048762553196349326), time, (train_step_time: 0.40139s, train_average_time: 0.39661s);(grad_norm_mean: nan, grad_norm_step: 0.23221) +2023-10-25 19:51:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4920, lr:1.479729e-06, step_mean_loss:0.0004977720673196018, average_loss:0.048570599140860184), time, (train_step_time: 0.37673s, train_average_time: 0.39660s);(grad_norm_mean: nan, grad_norm_step: 0.21117) +2023-10-25 19:51:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4940, lr:1.418950e-06, step_mean_loss:0.0062771150842309, average_loss:0.04838312921483336), time, (train_step_time: 0.38920s, train_average_time: 0.39658s);(grad_norm_mean: nan, grad_norm_step: 1.42831) +2023-10-25 19:51:27 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4960, lr:1.359349e-06, step_mean_loss:0.00016978733765427023, average_loss:0.04820029984149744), time, (train_step_time: 0.39191s, train_average_time: 0.39657s);(grad_norm_mean: nan, grad_norm_step: 0.01483) +2023-10-25 19:51:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4980, lr:1.300936e-06, step_mean_loss:9.736319043440744e-05, average_loss:0.048021972856227384), time, (train_step_time: 0.35195s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.00849) +2023-10-25 19:51:35 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 16, steps: 4981); +2023-10-25 19:51:37 INFO root donut_experiment.py:62 - batch inference time:1.4026503562927246 s +2023-10-25 19:51:39 INFO root donut_experiment.py:62 - batch inference time:1.8725228309631348 s +2023-10-25 19:51:40 INFO root donut_experiment.py:62 - batch inference time:1.0816400051116943 s +2023-10-25 19:51:41 INFO root donut_experiment.py:62 - batch inference time:0.699242115020752 s +2023-10-25 19:51:42 INFO root donut_experiment.py:62 - batch inference time:1.4760985374450684 s +2023-10-25 19:51:43 INFO root donut_experiment.py:62 - batch inference time:0.7852745056152344 s +2023-10-25 19:51:43 INFO root donut_experiment.py:62 - batch inference time:0.5514059066772461 s +2023-10-25 19:51:44 INFO root donut_experiment.py:62 - batch inference time:0.9813187122344971 s +2023-10-25 19:51:45 INFO root donut_experiment.py:62 - batch inference time:1.035710334777832 s +2023-10-25 19:51:47 INFO root donut_experiment.py:62 - batch inference time:1.5668859481811523 s +2023-10-25 19:51:49 INFO root donut_experiment.py:62 - batch inference time:1.778329610824585 s +2023-10-25 19:51:49 INFO root donut_experiment.py:62 - batch inference time:0.48743510246276855 s +2023-10-25 19:51:51 INFO root donut_experiment.py:62 - batch inference time:1.43516206741333 s +2023-10-25 19:51:52 INFO root donut_experiment.py:62 - batch inference time:1.1142148971557617 s +2023-10-25 19:51:53 INFO root donut_experiment.py:62 - batch inference time:1.4253430366516113 s +2023-10-25 19:51:54 INFO root donut_experiment.py:62 - batch inference time:1.079939365386963 s +2023-10-25 19:51:56 INFO root donut_experiment.py:62 - batch inference time:1.3251893520355225 s +2023-10-25 19:51:57 INFO root donut_experiment.py:62 - batch inference time:1.2103734016418457 s +2023-10-25 19:51:57 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:51:57 INFO root donut_experiment.py:72 - token_acc: 0.3936369307548347; edit_dis: 0.1187495183230897 +2023-10-25 19:52:01 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch16_step4981_lr1.298047e-06_avg_loss0.04801_token_acc0.39364_edit_dis0.11875.pth +2023-10-25 19:52:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5000, lr:1.243719e-06, step_mean_loss:0.0036770787555724382, average_loss:0.04783751300787553), time, (train_step_time: 0.39072s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 1.19775) +2023-10-25 19:52:10 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 17, steps: 5000); +2023-10-25 19:52:13 INFO root donut_experiment.py:62 - batch inference time:2.482919692993164 s +2023-10-25 19:52:15 INFO root donut_experiment.py:62 - batch inference time:1.9311933517456055 s +2023-10-25 19:52:16 INFO root donut_experiment.py:62 - batch inference time:1.096761703491211 s +2023-10-25 19:52:17 INFO root donut_experiment.py:62 - batch inference time:0.777327299118042 s +2023-10-25 19:52:18 INFO root donut_experiment.py:62 - batch inference time:1.5101566314697266 s +2023-10-25 19:52:19 INFO root donut_experiment.py:62 - batch inference time:0.7975060939788818 s +2023-10-25 19:52:19 INFO root donut_experiment.py:62 - batch inference time:0.563469648361206 s +2023-10-25 19:52:20 INFO root donut_experiment.py:62 - batch inference time:0.9999315738677979 s +2023-10-25 19:52:21 INFO root donut_experiment.py:62 - batch inference time:1.0541977882385254 s +2023-10-25 19:52:23 INFO root donut_experiment.py:62 - batch inference time:1.5888779163360596 s +2023-10-25 19:52:25 INFO root donut_experiment.py:62 - batch inference time:2.051974296569824 s +2023-10-25 19:52:26 INFO root donut_experiment.py:62 - batch inference time:0.49338483810424805 s +2023-10-25 19:52:27 INFO root donut_experiment.py:62 - batch inference time:1.434523105621338 s +2023-10-25 19:52:28 INFO root donut_experiment.py:62 - batch inference time:1.2120881080627441 s +2023-10-25 19:52:30 INFO root donut_experiment.py:62 - batch inference time:1.5406208038330078 s +2023-10-25 19:52:31 INFO root donut_experiment.py:62 - batch inference time:1.1182715892791748 s +2023-10-25 19:52:32 INFO root donut_experiment.py:62 - batch inference time:1.3474931716918945 s +2023-10-25 19:52:34 INFO root donut_experiment.py:62 - batch inference time:1.2395122051239014 s +2023-10-25 19:52:34 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:52:34 INFO root donut_experiment.py:72 - token_acc: 0.3078346699568168; edit_dis: 0.11245040965795604 +2023-10-25 19:52:38 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch17_step5000_lr1.243719e-06_avg_loss0.04784_token_acc0.30783_edit_dis0.11245.pth +2023-10-25 19:52:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5020, lr:1.187704e-06, step_mean_loss:2.8147796911071055e-05, average_loss:0.047667697944121845), time, (train_step_time: 0.40211s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 0.00252) +2023-10-25 19:52:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5040, lr:1.132901e-06, step_mean_loss:0.0012775057693943381, average_loss:0.047495334794535625), time, (train_step_time: 0.39265s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.27956) +2023-10-25 19:53:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5060, lr:1.079316e-06, step_mean_loss:0.00028526390087790787, average_loss:0.047328977477717145), time, (train_step_time: 0.40951s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 0.03790) +2023-10-25 19:53:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5080, lr:1.026956e-06, step_mean_loss:0.0007629336905665696, average_loss:0.047146338268034174), time, (train_step_time: 0.46385s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.07108) +2023-10-25 19:53:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5100, lr:9.758300e-07, step_mean_loss:0.0016749636270105839, average_loss:0.046969043393441386), time, (train_step_time: 0.44086s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 0.43256) +2023-10-25 19:53:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5120, lr:9.259438e-07, step_mean_loss:0.00046280198148451746, average_loss:0.046795498994895365), time, (train_step_time: 0.38578s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.05338) +2023-10-25 19:53:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5140, lr:8.773045e-07, step_mean_loss:0.00029264844488352537, average_loss:0.04661778554066264), time, (train_step_time: 0.37604s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 0.04986) +2023-10-25 19:53:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5160, lr:8.299187e-07, step_mean_loss:0.0012475766707211733, average_loss:0.04644675144143287), time, (train_step_time: 0.39517s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.06578) +2023-10-25 19:53:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5180, lr:7.837930e-07, step_mean_loss:0.005566865671426058, average_loss:0.04628875948233232), time, (train_step_time: 0.37712s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.30021) +2023-10-25 19:53:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5200, lr:7.389337e-07, step_mean_loss:7.675201777601615e-05, average_loss:0.04612910521789008), time, (train_step_time: 0.38758s, train_average_time: 0.39651s);(grad_norm_mean: nan, grad_norm_step: 0.00640) +2023-10-25 19:54:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5220, lr:6.953470e-07, step_mean_loss:0.0020067321602255106, average_loss:0.046002470034585075), time, (train_step_time: 0.39896s, train_average_time: 0.39650s);(grad_norm_mean: nan, grad_norm_step: 0.22645) +2023-10-25 19:54:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5240, lr:6.530389e-07, step_mean_loss:6.336159276543185e-05, average_loss:0.04585037103477291), time, (train_step_time: 0.42955s, train_average_time: 0.39652s);(grad_norm_mean: nan, grad_norm_step: 0.00465) +2023-10-25 19:54:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5260, lr:6.120152e-07, step_mean_loss:0.00010786287020891905, average_loss:0.04570654415931698), time, (train_step_time: 0.38483s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 0.00984) +2023-10-25 19:54:26 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 17, steps: 5274); +2023-10-25 19:54:29 INFO root donut_experiment.py:62 - batch inference time:2.539360761642456 s +2023-10-25 19:54:31 INFO root donut_experiment.py:62 - batch inference time:1.951282262802124 s +2023-10-25 19:54:33 INFO root donut_experiment.py:62 - batch inference time:1.0991849899291992 s +2023-10-25 19:54:33 INFO root donut_experiment.py:62 - batch inference time:0.712167501449585 s +2023-10-25 19:54:35 INFO root donut_experiment.py:62 - batch inference time:1.5048770904541016 s +2023-10-25 19:54:36 INFO root donut_experiment.py:62 - batch inference time:0.8517146110534668 s +2023-10-25 19:54:36 INFO root donut_experiment.py:62 - batch inference time:0.5960440635681152 s +2023-10-25 19:54:37 INFO root donut_experiment.py:62 - batch inference time:0.9758446216583252 s +2023-10-25 19:54:38 INFO root donut_experiment.py:62 - batch inference time:1.054525375366211 s +2023-10-25 19:54:40 INFO root donut_experiment.py:62 - batch inference time:1.5991253852844238 s +2023-10-25 19:54:42 INFO root donut_experiment.py:62 - batch inference time:1.688293218612671 s +2023-10-25 19:54:42 INFO root donut_experiment.py:62 - batch inference time:0.49442410469055176 s +2023-10-25 19:54:44 INFO root donut_experiment.py:62 - batch inference time:1.5048213005065918 s +2023-10-25 19:54:45 INFO root donut_experiment.py:62 - batch inference time:1.0557315349578857 s +2023-10-25 19:54:46 INFO root donut_experiment.py:62 - batch inference time:1.5249409675598145 s +2023-10-25 19:54:47 INFO root donut_experiment.py:62 - batch inference time:1.0776448249816895 s +2023-10-25 19:54:49 INFO root donut_experiment.py:62 - batch inference time:1.312307596206665 s +2023-10-25 19:54:50 INFO root donut_experiment.py:62 - batch inference time:1.2311203479766846 s +2023-10-25 19:54:50 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:54:50 INFO root donut_experiment.py:72 - token_acc: 0.31259720062208396; edit_dis: 0.10704811180391086 +2023-10-25 19:54:54 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch17_step5274_lr5.840658e-07_avg_loss0.04561_token_acc0.31260_edit_dis0.10705.pth +2023-10-25 19:54:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5280, lr:5.722815e-07, step_mean_loss:3.59598889190238e-05, average_loss:0.04557042289791539), time, (train_step_time: 0.40073s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.00443) +2023-10-25 19:55:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5300, lr:5.338432e-07, step_mean_loss:0.0002212553663412109, average_loss:0.0454071647809717), time, (train_step_time: 0.38884s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.02708) +2023-10-25 19:55:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5320, lr:4.967058e-07, step_mean_loss:0.001228508772328496, average_loss:0.04525046376749325), time, (train_step_time: 0.38124s, train_average_time: 0.39643s);(grad_norm_mean: nan, grad_norm_step: 0.14751) +2023-10-25 19:55:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5340, lr:4.608742e-07, step_mean_loss:0.0010440467158332467, average_loss:0.04509116853135682), time, (train_step_time: 0.39456s, train_average_time: 0.39644s);(grad_norm_mean: nan, grad_norm_step: 0.10915) +2023-10-25 19:55:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5360, lr:4.263534e-07, step_mean_loss:0.00012315371714066714, average_loss:0.04493494443839514), time, (train_step_time: 0.39884s, train_average_time: 0.39645s);(grad_norm_mean: nan, grad_norm_step: 0.00986) +2023-10-25 19:55:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5380, lr:3.931481e-07, step_mean_loss:0.00024038688570726663, average_loss:0.044788627979872535), time, (train_step_time: 0.38922s, train_average_time: 0.39646s);(grad_norm_mean: nan, grad_norm_step: 0.02208) +2023-10-25 19:55:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5400, lr:3.612630e-07, step_mean_loss:0.007675353437662125, average_loss:0.044634291924375145), time, (train_step_time: 0.39605s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 1.27931) +2023-10-25 19:55:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5420, lr:3.307023e-07, step_mean_loss:0.02160651423037052, average_loss:0.04450659395005328), time, (train_step_time: 0.42381s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 2.35222) +2023-10-25 19:56:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5440, lr:3.014703e-07, step_mean_loss:0.0037127279210835695, average_loss:0.044361258115222145), time, (train_step_time: 0.39498s, train_average_time: 0.39648s);(grad_norm_mean: nan, grad_norm_step: 0.88427) +2023-10-25 19:56:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5460, lr:2.735709e-07, step_mean_loss:0.00014359023771248758, average_loss:0.04420302474006911), time, (train_step_time: 0.39855s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 0.02136) +2023-10-25 19:56:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5480, lr:2.470082e-07, step_mean_loss:0.0005206987261772156, average_loss:0.044054307506040745), time, (train_step_time: 0.38436s, train_average_time: 0.39649s);(grad_norm_mean: nan, grad_norm_step: 0.09076) +2023-10-25 19:56:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5500, lr:2.217856e-07, step_mean_loss:0.01638559252023697, average_loss:0.043902734679113396), time, (train_step_time: 0.40095s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 2.26963) +2023-10-25 19:56:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5520, lr:1.979066e-07, step_mean_loss:8.909235475584865e-05, average_loss:0.04377471871164289), time, (train_step_time: 0.39502s, train_average_time: 0.39645s);(grad_norm_mean: nan, grad_norm_step: 0.00925) +2023-10-25 19:56:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5540, lr:1.753746e-07, step_mean_loss:0.01148831658065319, average_loss:0.04363084012200107), time, (train_step_time: 0.37883s, train_average_time: 0.39647s);(grad_norm_mean: nan, grad_norm_step: 3.00275) +2023-10-25 19:56:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5560, lr:1.541926e-07, step_mean_loss:0.0005817795754410326, average_loss:0.04347923258929415), time, (train_step_time: 0.37104s, train_average_time: 0.39641s);(grad_norm_mean: nan, grad_norm_step: 0.09010) +2023-10-25 19:56:51 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 18, steps: 5567); +2023-10-25 19:56:54 INFO root donut_experiment.py:62 - batch inference time:2.464193344116211 s +2023-10-25 19:56:56 INFO root donut_experiment.py:62 - batch inference time:1.875208854675293 s +2023-10-25 19:56:57 INFO root donut_experiment.py:62 - batch inference time:1.0912480354309082 s +2023-10-25 19:56:58 INFO root donut_experiment.py:62 - batch inference time:0.6974284648895264 s +2023-10-25 19:56:59 INFO root donut_experiment.py:62 - batch inference time:1.4942657947540283 s +2023-10-25 19:57:00 INFO root donut_experiment.py:62 - batch inference time:0.7901482582092285 s +2023-10-25 19:57:00 INFO root donut_experiment.py:62 - batch inference time:0.5620331764221191 s +2023-10-25 19:57:01 INFO root donut_experiment.py:62 - batch inference time:0.9925305843353271 s +2023-10-25 19:57:02 INFO root donut_experiment.py:62 - batch inference time:1.049485445022583 s +2023-10-25 19:57:04 INFO root donut_experiment.py:62 - batch inference time:1.5903279781341553 s +2023-10-25 19:57:05 INFO root donut_experiment.py:62 - batch inference time:1.3917295932769775 s +2023-10-25 19:57:06 INFO root donut_experiment.py:62 - batch inference time:0.46935224533081055 s +2023-10-25 19:57:07 INFO root donut_experiment.py:62 - batch inference time:1.488633394241333 s +2023-10-25 19:57:09 INFO root donut_experiment.py:62 - batch inference time:1.1575596332550049 s +2023-10-25 19:57:10 INFO root donut_experiment.py:62 - batch inference time:1.530019760131836 s +2023-10-25 19:57:11 INFO root donut_experiment.py:62 - batch inference time:1.1009304523468018 s +2023-10-25 19:57:13 INFO root donut_experiment.py:62 - batch inference time:1.350135087966919 s +2023-10-25 19:57:14 INFO root donut_experiment.py:62 - batch inference time:1.2337265014648438 s +2023-10-25 19:57:14 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:57:14 INFO root donut_experiment.py:72 - token_acc: 0.3895001571832757; edit_dis: 0.10236144830965992 +2023-10-25 19:57:18 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch18_step5567_lr1.470983e-07_avg_loss0.04344_token_acc0.38950_edit_dis0.10236.pth +2023-10-25 19:57:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5580, lr:1.343635e-07, step_mean_loss:0.0010204362915828824, average_loss:0.04334297956341135), time, (train_step_time: 0.38715s, train_average_time: 0.39640s);(grad_norm_mean: nan, grad_norm_step: 0.11099) +2023-10-25 19:57:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5600, lr:1.158901e-07, step_mean_loss:0.0013506036484614015, average_loss:0.0432044550404651), time, (train_step_time: 0.38427s, train_average_time: 0.39637s);(grad_norm_mean: nan, grad_norm_step: 0.20335) +2023-10-25 19:57:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5620, lr:9.877485e-08, step_mean_loss:0.0009070456144399941, average_loss:0.04305729797347712), time, (train_step_time: 0.40467s, train_average_time: 0.39633s);(grad_norm_mean: nan, grad_norm_step: 0.07661) +2023-10-25 19:57:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5640, lr:8.302018e-08, step_mean_loss:0.0012817034730687737, average_loss:0.04291294370189765), time, (train_step_time: 0.42740s, train_average_time: 0.39633s);(grad_norm_mean: nan, grad_norm_step: 0.15482) +2023-10-25 19:57:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5660, lr:6.862823e-08, step_mean_loss:0.0015525614144280553, average_loss:0.04277105200413811), time, (train_step_time: 0.37781s, train_average_time: 0.39633s);(grad_norm_mean: nan, grad_norm_step: 0.21999) +2023-10-25 19:58:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5680, lr:5.560096e-08, step_mean_loss:0.002631383016705513, average_loss:0.042633164467262986), time, (train_step_time: 0.38449s, train_average_time: 0.39633s);(grad_norm_mean: nan, grad_norm_step: 0.31951) +2023-10-25 19:58:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5700, lr:4.394018e-08, step_mean_loss:0.00020690736710093915, average_loss:0.04248893624059795), time, (train_step_time: 0.38912s, train_average_time: 0.39634s);(grad_norm_mean: nan, grad_norm_step: 0.01350) +2023-10-25 19:58:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5720, lr:3.364748e-08, step_mean_loss:0.013456171378493309, average_loss:0.04234940965642146), time, (train_step_time: 0.39786s, train_average_time: 0.39637s);(grad_norm_mean: nan, grad_norm_step: 1.84643) +2023-10-25 19:58:27 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5740, lr:2.472428e-08, step_mean_loss:0.009396188892424107, average_loss:0.042206981528841885), time, (train_step_time: 0.39323s, train_average_time: 0.39637s);(grad_norm_mean: nan, grad_norm_step: 2.10150) +2023-10-25 19:58:35 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5760, lr:1.717180e-08, step_mean_loss:0.00011443261610111222, average_loss:0.04207578449152657), time, (train_step_time: 0.39221s, train_average_time: 0.39637s);(grad_norm_mean: nan, grad_norm_step: 0.00811) +2023-10-25 19:58:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5780, lr:1.099109e-08, step_mean_loss:5.858242366230115e-05, average_loss:0.04193691253576711), time, (train_step_time: 0.38211s, train_average_time: 0.39640s);(grad_norm_mean: nan, grad_norm_step: 0.01044) +2023-10-25 19:58:51 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5800, lr:6.182981e-09, step_mean_loss:0.00022032405831851065, average_loss:0.04183941520384467), time, (train_step_time: 0.37611s, train_average_time: 0.39639s);(grad_norm_mean: nan, grad_norm_step: 0.05903) +2023-10-25 19:58:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5820, lr:2.748149e-09, step_mean_loss:3.76311618310865e-05, average_loss:0.0417074440126174), time, (train_step_time: 0.37209s, train_average_time: 0.39639s);(grad_norm_mean: nan, grad_norm_step: 0.00501) +2023-10-25 19:59:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5840, lr:6.870608e-10, step_mean_loss:6.650447903666645e-05, average_loss:0.041568377194171134), time, (train_step_time: 0.37963s, train_average_time: 0.39638s);(grad_norm_mean: nan, grad_norm_step: 0.00992) +2023-10-25 19:59:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5860, lr:0.000000e+00, step_mean_loss:0.00021839721011929214, average_loss:0.04143629743532583), time, (train_step_time: 0.35505s, train_average_time: 0.39629s);(grad_norm_mean: nan, grad_norm_step: 0.02116) +2023-10-25 19:59:15 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 19, steps: 5860); +2023-10-25 19:59:18 INFO root donut_experiment.py:62 - batch inference time:2.4242782592773438 s +2023-10-25 19:59:20 INFO root donut_experiment.py:62 - batch inference time:1.9451062679290771 s +2023-10-25 19:59:21 INFO root donut_experiment.py:62 - batch inference time:1.1149086952209473 s +2023-10-25 19:59:21 INFO root donut_experiment.py:62 - batch inference time:0.6961526870727539 s +2023-10-25 19:59:23 INFO root donut_experiment.py:62 - batch inference time:1.5639188289642334 s +2023-10-25 19:59:24 INFO root donut_experiment.py:62 - batch inference time:0.8161253929138184 s +2023-10-25 19:59:24 INFO root donut_experiment.py:62 - batch inference time:0.5621988773345947 s +2023-10-25 19:59:25 INFO root donut_experiment.py:62 - batch inference time:1.0174446105957031 s +2023-10-25 19:59:26 INFO root donut_experiment.py:62 - batch inference time:1.064770221710205 s +2023-10-25 19:59:28 INFO root donut_experiment.py:62 - batch inference time:1.612372875213623 s +2023-10-25 19:59:30 INFO root donut_experiment.py:62 - batch inference time:1.8016202449798584 s +2023-10-25 19:59:30 INFO root donut_experiment.py:62 - batch inference time:0.5043320655822754 s +2023-10-25 19:59:32 INFO root donut_experiment.py:62 - batch inference time:1.4801969528198242 s +2023-10-25 19:59:33 INFO root donut_experiment.py:62 - batch inference time:1.0798912048339844 s +2023-10-25 19:59:34 INFO root donut_experiment.py:62 - batch inference time:1.4234836101531982 s +2023-10-25 19:59:35 INFO root donut_experiment.py:62 - batch inference time:1.070096492767334 s +2023-10-25 19:59:37 INFO root donut_experiment.py:62 - batch inference time:1.3559293746948242 s +2023-10-25 19:59:38 INFO root donut_experiment.py:62 - batch inference time:1.246992588043213 s +2023-10-25 19:59:38 INFO root donut_experiment.py:71 - evaluating... +2023-10-25 19:59:38 INFO root donut_experiment.py:72 - token_acc: 0.33974557865342847; edit_dis: 0.11337283713515317 +2023-10-25 19:59:43 INFO root base_experiment.py:333 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/./nougat_latex/nougat-base_epoch19_step5860_lr0.000000e+00_avg_loss0.04144_token_acc0.33975_edit_dis0.11337.pth +2023-10-26 12:30:40 INFO root base_experiment.py:181 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-26 12:31:03 INFO root donut_experiment.py:145 - init weight from pretrained model:facebook/nougat-base +2023-10-26 12:31:06 INFO root donut_experiment.py:152 - Number of parameter: 348.69M +2023-10-26 12:31:07 INFO root donut_experiment.py:226 - use data loader with batch_size:2,num_workers:10 +2023-10-26 12:31:07 INFO root donut_experiment.py:179 - success init train data loader len:293 +2023-10-26 12:31:07 INFO root donut_experiment.py:226 - use data loader with batch_size:2,num_workers:10 +2023-10-26 12:31:07 INFO root donut_experiment.py:192 - success init eval data loader len:18 +2023-10-26 12:31:07 INFO root base_experiment.py:293 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:5860, gradient_accumulator:1 +2023-10-26 12:31:07 INFO root base_experiment.py:224 - current trainer epochs:20, train_dataset_len:586, data_loader_len:293 +2023-10-26 12:31:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 20, lr:8.000000e-07, step_mean_loss:1.0553369522094727, average_loss:0.5562451958656311), time, (train_step_time: 0.53685s, train_average_time: 0.68287s);(grad_norm_mean: inf, grad_norm_step: 17.89600) +2023-10-26 12:31:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 40, lr:1.600000e-06, step_mean_loss:0.48397156596183777, average_loss:0.4979372933506966), time, (train_step_time: 0.54830s, train_average_time: 0.62004s);(grad_norm_mean: inf, grad_norm_step: 4.47663) +2023-10-26 12:31:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 60, lr:2.400000e-06, step_mean_loss:0.10057521611452103, average_loss:0.48527882260580857), time, (train_step_time: 0.59045s, train_average_time: 0.60343s);(grad_norm_mean: inf, grad_norm_step: 2.02597) +2023-10-26 12:31:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 80, lr:3.200000e-06, step_mean_loss:0.184544175863266, average_loss:0.47076297691091895), time, (train_step_time: 0.59847s, train_average_time: 0.59808s);(grad_norm_mean: inf, grad_norm_step: 2.08308) +2023-10-26 12:32:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 100, lr:4.000000e-06, step_mean_loss:0.1148744747042656, average_loss:0.42597986370325086), time, (train_step_time: 0.54882s, train_average_time: 0.59316s);(grad_norm_mean: inf, grad_norm_step: 2.01234) +2023-10-26 12:32:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 120, lr:4.800000e-06, step_mean_loss:0.21608218550682068, average_loss:0.3915893180916707), time, (train_step_time: 0.54416s, train_average_time: 0.59049s);(grad_norm_mean: inf, grad_norm_step: 6.26899) +2023-10-26 12:32:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 140, lr:5.600000e-06, step_mean_loss:0.12520383298397064, average_loss:0.3631696643041713), time, (train_step_time: 0.57936s, train_average_time: 0.58794s);(grad_norm_mean: inf, grad_norm_step: 2.08688) +2023-10-26 12:32:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 160, lr:6.400000e-06, step_mean_loss:0.11729665100574493, average_loss:0.3536484681535512), time, (train_step_time: 0.54208s, train_average_time: 0.58473s);(grad_norm_mean: inf, grad_norm_step: 2.66429) +2023-10-26 12:32:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 180, lr:7.200000e-06, step_mean_loss:0.08415068686008453, average_loss:0.3374446161091328), time, (train_step_time: 0.54941s, train_average_time: 0.58254s);(grad_norm_mean: inf, grad_norm_step: 2.32221) +2023-10-26 12:33:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 200, lr:8.000000e-06, step_mean_loss:0.13081657886505127, average_loss:0.32279277491383257), time, (train_step_time: 0.53759s, train_average_time: 0.58010s);(grad_norm_mean: inf, grad_norm_step: 3.87322) +2023-10-26 12:33:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 220, lr:8.800000e-06, step_mean_loss:0.24717874825000763, average_loss:0.31431662576611746), time, (train_step_time: 0.53306s, train_average_time: 0.57762s);(grad_norm_mean: inf, grad_norm_step: 6.55529) +2023-10-26 12:33:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 240, lr:9.600000e-06, step_mean_loss:0.11096413433551788, average_loss:0.30117929162612805), time, (train_step_time: 0.58058s, train_average_time: 0.57594s);(grad_norm_mean: nan, grad_norm_step: 2.11402) +2023-10-26 12:33:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 260, lr:1.040000e-05, step_mean_loss:0.12048478424549103, average_loss:0.29398925616047705), time, (train_step_time: 0.53514s, train_average_time: 0.57464s);(grad_norm_mean: nan, grad_norm_step: 3.65818) +2023-10-26 12:33:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 0, steps: 280, lr:1.120000e-05, step_mean_loss:0.22567220032215118, average_loss:0.28764251269666213), time, (train_step_time: 0.52405s, train_average_time: 0.57388s);(grad_norm_mean: nan, grad_norm_step: 7.65523) +2023-10-26 12:33:56 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 0, steps: 293); +2023-10-26 12:34:01 INFO root donut_experiment.py:62 - batch inference time:3.6980807781219482 s +2023-10-26 12:34:03 INFO root donut_experiment.py:62 - batch inference time:2.3658535480499268 s +2023-10-26 12:34:13 INFO root donut_experiment.py:62 - batch inference time:10.101116180419922 s +2023-10-26 12:34:24 INFO root donut_experiment.py:62 - batch inference time:10.680176019668579 s +2023-10-26 12:34:26 INFO root donut_experiment.py:62 - batch inference time:1.8605024814605713 s +2023-10-26 12:34:27 INFO root donut_experiment.py:62 - batch inference time:1.106893539428711 s +2023-10-26 12:34:28 INFO root donut_experiment.py:62 - batch inference time:0.8492166996002197 s +2023-10-26 12:34:29 INFO root donut_experiment.py:62 - batch inference time:1.4355571269989014 s +2023-10-26 12:34:31 INFO root donut_experiment.py:62 - batch inference time:1.4089958667755127 s +2023-10-26 12:34:33 INFO root donut_experiment.py:62 - batch inference time:2.0936880111694336 s +2023-10-26 12:34:35 INFO root donut_experiment.py:62 - batch inference time:1.8255829811096191 s +2023-10-26 12:34:36 INFO root donut_experiment.py:62 - batch inference time:0.7692561149597168 s +2023-10-26 12:34:38 INFO root donut_experiment.py:62 - batch inference time:1.9043476581573486 s +2023-10-26 12:34:39 INFO root donut_experiment.py:62 - batch inference time:1.4103574752807617 s +2023-10-26 12:34:41 INFO root donut_experiment.py:62 - batch inference time:2.0342376232147217 s +2023-10-26 12:34:43 INFO root donut_experiment.py:62 - batch inference time:1.4842338562011719 s +2023-10-26 12:34:53 INFO root donut_experiment.py:62 - batch inference time:10.621803283691406 s +2023-10-26 12:34:55 INFO root donut_experiment.py:62 - batch inference time:1.556122064590454 s +2023-10-26 12:34:55 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:34:55 INFO root donut_experiment.py:72 - token_acc: 0.19435154217762912; edit_dis: 1.4502020730787244 +2023-10-26 12:35:00 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch0_step293_lr1.172000e-05_avg_loss0.28025_token_acc0.19435_edit_dis1.45020.pth +2023-10-26 12:35:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 300, lr:1.200000e-05, step_mean_loss:0.17456145584583282, average_loss:0.27704182617366313), time, (train_step_time: 0.57983s, train_average_time: 0.57367s);(grad_norm_mean: nan, grad_norm_step: 4.87766) +2023-10-26 12:35:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 320, lr:1.280000e-05, step_mean_loss:0.06194496154785156, average_loss:0.2663383764855098), time, (train_step_time: 0.54874s, train_average_time: 0.57398s);(grad_norm_mean: nan, grad_norm_step: 1.64711) +2023-10-26 12:35:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 340, lr:1.360000e-05, step_mean_loss:0.5965428948402405, average_loss:0.26085712998995886), time, (train_step_time: 0.56588s, train_average_time: 0.57247s);(grad_norm_mean: nan, grad_norm_step: 11.62092) +2023-10-26 12:35:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 360, lr:1.440000e-05, step_mean_loss:0.29057687520980835, average_loss:0.25492964916241667), time, (train_step_time: 0.52177s, train_average_time: 0.57211s);(grad_norm_mean: nan, grad_norm_step: 5.83028) +2023-10-26 12:35:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 380, lr:1.520000e-05, step_mean_loss:0.056654565036296844, average_loss:0.2468835735791608), time, (train_step_time: 0.54888s, train_average_time: 0.57150s);(grad_norm_mean: nan, grad_norm_step: 0.99971) +2023-10-26 12:36:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 400, lr:1.600000e-05, step_mean_loss:0.12393049150705338, average_loss:0.24073192005511374), time, (train_step_time: 0.55326s, train_average_time: 0.57063s);(grad_norm_mean: nan, grad_norm_step: 9.97880) +2023-10-26 12:36:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 420, lr:1.680000e-05, step_mean_loss:0.13472123444080353, average_loss:0.23482456291094422), time, (train_step_time: 0.54888s, train_average_time: 0.56976s);(grad_norm_mean: nan, grad_norm_step: 2.21776) +2023-10-26 12:36:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 440, lr:1.760000e-05, step_mean_loss:0.10051782429218292, average_loss:0.2302930051206865), time, (train_step_time: 0.57226s, train_average_time: 0.56938s);(grad_norm_mean: nan, grad_norm_step: 3.32623) +2023-10-26 12:36:35 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 460, lr:1.840000e-05, step_mean_loss:0.07919333130121231, average_loss:0.22589181545595435), time, (train_step_time: 0.57347s, train_average_time: 0.56912s);(grad_norm_mean: nan, grad_norm_step: 2.09650) +2023-10-26 12:36:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 480, lr:1.920000e-05, step_mean_loss:0.1299516260623932, average_loss:0.22433540658676066), time, (train_step_time: 0.67869s, train_average_time: 0.56893s);(grad_norm_mean: nan, grad_norm_step: 7.44323) +2023-10-26 12:36:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 500, lr:2.000000e-05, step_mean_loss:0.2398732602596283, average_loss:0.22248748815618455), time, (train_step_time: 0.55762s, train_average_time: 0.56847s);(grad_norm_mean: nan, grad_norm_step: 7.77120) +2023-10-26 12:37:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 520, lr:1.999931e-05, step_mean_loss:0.07361119985580444, average_loss:0.21763492992255262), time, (train_step_time: 0.61404s, train_average_time: 0.56815s);(grad_norm_mean: nan, grad_norm_step: 2.74971) +2023-10-26 12:37:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 540, lr:1.999725e-05, step_mean_loss:0.04339558258652687, average_loss:0.21477058643164734), time, (train_step_time: 0.55420s, train_average_time: 0.56791s);(grad_norm_mean: nan, grad_norm_step: 1.98547) +2023-10-26 12:37:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 560, lr:1.999382e-05, step_mean_loss:0.2573273181915283, average_loss:0.2124667587290917), time, (train_step_time: 0.53584s, train_average_time: 0.56773s);(grad_norm_mean: nan, grad_norm_step: 3.16152) +2023-10-26 12:37:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 1, steps: 580, lr:1.998901e-05, step_mean_loss:0.08279241621494293, average_loss:0.20920990329910197), time, (train_step_time: 0.52908s, train_average_time: 0.56748s);(grad_norm_mean: nan, grad_norm_step: 2.22144) +2023-10-26 12:37:46 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 1, steps: 586); +2023-10-26 12:37:50 INFO root donut_experiment.py:62 - batch inference time:3.211170196533203 s +2023-10-26 12:37:53 INFO root donut_experiment.py:62 - batch inference time:2.5270745754241943 s +2023-10-26 12:37:54 INFO root donut_experiment.py:62 - batch inference time:1.4992682933807373 s +2023-10-26 12:37:56 INFO root donut_experiment.py:62 - batch inference time:1.0330016613006592 s +2023-10-26 12:37:58 INFO root donut_experiment.py:62 - batch inference time:2.022871732711792 s +2023-10-26 12:37:59 INFO root donut_experiment.py:62 - batch inference time:1.2202680110931396 s +2023-10-26 12:38:00 INFO root donut_experiment.py:62 - batch inference time:0.8074262142181396 s +2023-10-26 12:38:01 INFO root donut_experiment.py:62 - batch inference time:1.3235492706298828 s +2023-10-26 12:38:02 INFO root donut_experiment.py:62 - batch inference time:1.3628389835357666 s +2023-10-26 12:38:04 INFO root donut_experiment.py:62 - batch inference time:1.9975810050964355 s +2023-10-26 12:38:07 INFO root donut_experiment.py:62 - batch inference time:2.1793265342712402 s +2023-10-26 12:38:07 INFO root donut_experiment.py:62 - batch inference time:0.7724909782409668 s +2023-10-26 12:38:09 INFO root donut_experiment.py:62 - batch inference time:2.0629193782806396 s +2023-10-26 12:38:11 INFO root donut_experiment.py:62 - batch inference time:1.4933602809906006 s +2023-10-26 12:38:13 INFO root donut_experiment.py:62 - batch inference time:2.041300058364868 s +2023-10-26 12:38:15 INFO root donut_experiment.py:62 - batch inference time:1.4689371585845947 s +2023-10-26 12:38:16 INFO root donut_experiment.py:62 - batch inference time:1.7103073596954346 s +2023-10-26 12:38:18 INFO root donut_experiment.py:62 - batch inference time:1.5947105884552002 s +2023-10-26 12:38:18 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:38:18 INFO root donut_experiment.py:72 - token_acc: 0.36576239476145933; edit_dis: 0.15213278058884389 +2023-10-26 12:38:24 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch1_step586_lr1.998730e-05_avg_loss0.20770_token_acc0.36576_edit_dis0.15213.pth +2023-10-26 12:38:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 600, lr:1.998283e-05, step_mean_loss:0.09122180193662643, average_loss:0.2060531788179651), time, (train_step_time: 0.53371s, train_average_time: 0.56783s);(grad_norm_mean: nan, grad_norm_step: 2.87828) +2023-10-26 12:38:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 620, lr:1.997528e-05, step_mean_loss:0.012412048876285553, average_loss:0.20219963196845306), time, (train_step_time: 0.59150s, train_average_time: 0.56778s);(grad_norm_mean: nan, grad_norm_step: 0.81780) +2023-10-26 12:38:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 640, lr:1.996635e-05, step_mean_loss:0.017424823716282845, average_loss:0.19786834904953138), time, (train_step_time: 0.53283s, train_average_time: 0.56772s);(grad_norm_mean: nan, grad_norm_step: 0.79175) +2023-10-26 12:39:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 660, lr:1.995606e-05, step_mean_loss:0.14716126024723053, average_loss:0.1965916664822902), time, (train_step_time: 0.53015s, train_average_time: 0.56729s);(grad_norm_mean: nan, grad_norm_step: 4.85196) +2023-10-26 12:39:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 680, lr:1.994440e-05, step_mean_loss:0.12624241411685944, average_loss:0.19401058234952334), time, (train_step_time: 0.56009s, train_average_time: 0.56727s);(grad_norm_mean: nan, grad_norm_step: 4.43651) +2023-10-26 12:39:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 700, lr:1.993137e-05, step_mean_loss:0.03348444402217865, average_loss:0.19075254634654681), time, (train_step_time: 0.54235s, train_average_time: 0.56711s);(grad_norm_mean: nan, grad_norm_step: 3.76999) +2023-10-26 12:39:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 720, lr:1.991698e-05, step_mean_loss:0.026829246431589127, average_loss:0.18807080228030423), time, (train_step_time: 0.53658s, train_average_time: 0.56681s);(grad_norm_mean: nan, grad_norm_step: 0.89472) +2023-10-26 12:39:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 740, lr:1.990123e-05, step_mean_loss:0.05960846319794655, average_loss:0.18429186951425683), time, (train_step_time: 0.56923s, train_average_time: 0.56699s);(grad_norm_mean: nan, grad_norm_step: 2.53182) +2023-10-26 12:40:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 760, lr:1.988411e-05, step_mean_loss:0.06013166531920433, average_loss:0.18333593686509533), time, (train_step_time: 0.55416s, train_average_time: 0.56707s);(grad_norm_mean: nan, grad_norm_step: 2.13191) +2023-10-26 12:40:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 780, lr:1.986564e-05, step_mean_loss:0.05286954715847969, average_loss:0.18066714965433886), time, (train_step_time: 0.59934s, train_average_time: 0.56690s);(grad_norm_mean: nan, grad_norm_step: 2.01156) +2023-10-26 12:40:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 800, lr:1.984581e-05, step_mean_loss:0.10073433816432953, average_loss:0.1786925066789263), time, (train_step_time: 0.57243s, train_average_time: 0.56671s);(grad_norm_mean: nan, grad_norm_step: 5.11844) +2023-10-26 12:40:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 820, lr:1.982463e-05, step_mean_loss:0.17155537009239197, average_loss:0.17606314824604452), time, (train_step_time: 0.54955s, train_average_time: 0.56612s);(grad_norm_mean: nan, grad_norm_step: 3.39318) +2023-10-26 12:40:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 840, lr:1.980209e-05, step_mean_loss:0.07400123029947281, average_loss:0.174185288836348), time, (train_step_time: 0.60023s, train_average_time: 0.56601s);(grad_norm_mean: nan, grad_norm_step: 1.13993) +2023-10-26 12:40:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 2, steps: 860, lr:1.977821e-05, step_mean_loss:0.05674242973327637, average_loss:0.1721688659279607), time, (train_step_time: 0.60630s, train_average_time: 0.56631s);(grad_norm_mean: nan, grad_norm_step: 2.44153) +2023-10-26 12:41:10 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 2, steps: 879); +2023-10-26 12:41:14 INFO root donut_experiment.py:62 - batch inference time:2.9615259170532227 s +2023-10-26 12:41:16 INFO root donut_experiment.py:62 - batch inference time:2.3083627223968506 s +2023-10-26 12:41:18 INFO root donut_experiment.py:62 - batch inference time:1.4087154865264893 s +2023-10-26 12:41:19 INFO root donut_experiment.py:62 - batch inference time:0.9584076404571533 s +2023-10-26 12:41:21 INFO root donut_experiment.py:62 - batch inference time:1.9698305130004883 s +2023-10-26 12:41:22 INFO root donut_experiment.py:62 - batch inference time:1.080702543258667 s +2023-10-26 12:41:23 INFO root donut_experiment.py:62 - batch inference time:0.8434798717498779 s +2023-10-26 12:41:24 INFO root donut_experiment.py:62 - batch inference time:1.332179069519043 s +2023-10-26 12:41:26 INFO root donut_experiment.py:62 - batch inference time:1.4195671081542969 s +2023-10-26 12:41:28 INFO root donut_experiment.py:62 - batch inference time:1.9581069946289062 s +2023-10-26 12:41:29 INFO root donut_experiment.py:62 - batch inference time:1.8196969032287598 s +2023-10-26 12:41:30 INFO root donut_experiment.py:62 - batch inference time:0.7534475326538086 s +2023-10-26 12:41:32 INFO root donut_experiment.py:62 - batch inference time:1.9289793968200684 s +2023-10-26 12:41:34 INFO root donut_experiment.py:62 - batch inference time:1.594283103942871 s +2023-10-26 12:41:36 INFO root donut_experiment.py:62 - batch inference time:2.051363229751587 s +2023-10-26 12:41:37 INFO root donut_experiment.py:62 - batch inference time:1.3211278915405273 s +2023-10-26 12:41:39 INFO root donut_experiment.py:62 - batch inference time:2.0229928493499756 s +2023-10-26 12:41:41 INFO root donut_experiment.py:62 - batch inference time:1.676405906677246 s +2023-10-26 12:41:41 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:41:41 INFO root donut_experiment.py:72 - token_acc: 0.41571969696969696; edit_dis: 0.14009444784762629 +2023-10-26 12:41:47 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch2_step879_lr1.975428e-05_avg_loss0.16994_token_acc0.41572_edit_dis0.14009.pth +2023-10-26 12:41:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 880, lr:1.975299e-05, step_mean_loss:0.07692039012908936, average_loss:0.16983554210402707), time, (train_step_time: 0.70031s, train_average_time: 0.56659s);(grad_norm_mean: nan, grad_norm_step: 2.23050) +2023-10-26 12:42:00 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 900, lr:1.972643e-05, step_mean_loss:0.023202883079648018, average_loss:0.16715985462871483), time, (train_step_time: 0.53705s, train_average_time: 0.56657s);(grad_norm_mean: nan, grad_norm_step: 1.76612) +2023-10-26 12:42:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 920, lr:1.969853e-05, step_mean_loss:0.020795265212655067, average_loss:0.16520794521303564), time, (train_step_time: 0.61799s, train_average_time: 0.56687s);(grad_norm_mean: nan, grad_norm_step: 0.91658) +2023-10-26 12:42:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 940, lr:1.966930e-05, step_mean_loss:0.11121229827404022, average_loss:0.16304845880528793), time, (train_step_time: 0.56162s, train_average_time: 0.56676s);(grad_norm_mean: nan, grad_norm_step: 4.37895) +2023-10-26 12:42:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 960, lr:1.963874e-05, step_mean_loss:0.023149993270635605, average_loss:0.16077326483549162), time, (train_step_time: 0.67108s, train_average_time: 0.56715s);(grad_norm_mean: nan, grad_norm_step: 4.23267) +2023-10-26 12:42:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 980, lr:1.960685e-05, step_mean_loss:0.016331350430846214, average_loss:0.15849139303282586), time, (train_step_time: 0.57173s, train_average_time: 0.56741s);(grad_norm_mean: nan, grad_norm_step: 1.83071) +2023-10-26 12:42:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1000, lr:1.957365e-05, step_mean_loss:0.05541873350739479, average_loss:0.15687730132741853), time, (train_step_time: 0.53934s, train_average_time: 0.56723s);(grad_norm_mean: nan, grad_norm_step: 1.85262) +2023-10-26 12:42:57 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 3, steps: 1000); +2023-10-26 12:43:01 INFO root donut_experiment.py:62 - batch inference time:2.957193374633789 s +2023-10-26 12:43:03 INFO root donut_experiment.py:62 - batch inference time:2.3611111640930176 s +2023-10-26 12:43:05 INFO root donut_experiment.py:62 - batch inference time:1.429222583770752 s +2023-10-26 12:43:06 INFO root donut_experiment.py:62 - batch inference time:0.9769048690795898 s +2023-10-26 12:43:08 INFO root donut_experiment.py:62 - batch inference time:1.9438958168029785 s +2023-10-26 12:43:09 INFO root donut_experiment.py:62 - batch inference time:1.1411683559417725 s +2023-10-26 12:43:10 INFO root donut_experiment.py:62 - batch inference time:0.9256877899169922 s +2023-10-26 12:43:11 INFO root donut_experiment.py:62 - batch inference time:1.3153419494628906 s +2023-10-26 12:43:13 INFO root donut_experiment.py:62 - batch inference time:1.4480669498443604 s +2023-10-26 12:43:15 INFO root donut_experiment.py:62 - batch inference time:2.2030837535858154 s +2023-10-26 12:43:17 INFO root donut_experiment.py:62 - batch inference time:2.175546646118164 s +2023-10-26 12:43:18 INFO root donut_experiment.py:62 - batch inference time:0.7785604000091553 s +2023-10-26 12:43:20 INFO root donut_experiment.py:62 - batch inference time:1.7817420959472656 s +2023-10-26 12:43:21 INFO root donut_experiment.py:62 - batch inference time:1.5608975887298584 s +2023-10-26 12:43:23 INFO root donut_experiment.py:62 - batch inference time:1.8320484161376953 s +2023-10-26 12:43:24 INFO root donut_experiment.py:62 - batch inference time:1.419853925704956 s +2023-10-26 12:43:26 INFO root donut_experiment.py:62 - batch inference time:1.7378830909729004 s +2023-10-26 12:43:28 INFO root donut_experiment.py:62 - batch inference time:1.572253704071045 s +2023-10-26 12:43:28 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:43:28 INFO root donut_experiment.py:72 - token_acc: 0.3357120695004654; edit_dis: 0.11693181240727243 +2023-10-26 12:43:33 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch3_step1000_lr1.957365e-05_avg_loss0.15688_token_acc0.33571_edit_dis0.11693.pth +2023-10-26 12:43:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1020, lr:1.953913e-05, step_mean_loss:0.04390299320220947, average_loss:0.15555711219534643), time, (train_step_time: 0.59387s, train_average_time: 0.56743s);(grad_norm_mean: nan, grad_norm_step: 1.59662) +2023-10-26 12:43:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1040, lr:1.950329e-05, step_mean_loss:0.061536356806755066, average_loss:0.1536318600544921), time, (train_step_time: 0.56085s, train_average_time: 0.56734s);(grad_norm_mean: nan, grad_norm_step: 1.27224) +2023-10-26 12:44:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1060, lr:1.946616e-05, step_mean_loss:0.03808756545186043, average_loss:0.1526436223305832), time, (train_step_time: 0.61518s, train_average_time: 0.56719s);(grad_norm_mean: nan, grad_norm_step: 1.30674) +2023-10-26 12:44:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1080, lr:1.942772e-05, step_mean_loss:0.14551813900470734, average_loss:0.15106310704033132), time, (train_step_time: 0.54194s, train_average_time: 0.56725s);(grad_norm_mean: nan, grad_norm_step: 4.41166) +2023-10-26 12:44:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1100, lr:1.938798e-05, step_mean_loss:0.11392299830913544, average_loss:0.14940080733461814), time, (train_step_time: 0.52772s, train_average_time: 0.56767s);(grad_norm_mean: nan, grad_norm_step: 2.66051) +2023-10-26 12:44:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1120, lr:1.934696e-05, step_mean_loss:0.02256678231060505, average_loss:0.14769140508474915), time, (train_step_time: 0.58999s, train_average_time: 0.56750s);(grad_norm_mean: nan, grad_norm_step: 1.25541) +2023-10-26 12:44:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1140, lr:1.930465e-05, step_mean_loss:0.05565804988145828, average_loss:0.1458534831254694), time, (train_step_time: 0.62990s, train_average_time: 0.56774s);(grad_norm_mean: nan, grad_norm_step: 2.19217) +2023-10-26 12:45:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 3, steps: 1160, lr:1.926107e-05, step_mean_loss:0.06573444604873657, average_loss:0.14500384402846725), time, (train_step_time: 0.54994s, train_average_time: 0.56793s);(grad_norm_mean: nan, grad_norm_step: 4.01110) +2023-10-26 12:45:12 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 3, steps: 1172); +2023-10-26 12:45:16 INFO root donut_experiment.py:62 - batch inference time:2.877197504043579 s +2023-10-26 12:45:18 INFO root donut_experiment.py:62 - batch inference time:2.302034854888916 s +2023-10-26 12:45:20 INFO root donut_experiment.py:62 - batch inference time:1.7465159893035889 s +2023-10-26 12:45:21 INFO root donut_experiment.py:62 - batch inference time:0.9805412292480469 s +2023-10-26 12:45:23 INFO root donut_experiment.py:62 - batch inference time:1.8928248882293701 s +2023-10-26 12:45:24 INFO root donut_experiment.py:62 - batch inference time:1.072155475616455 s +2023-10-26 12:45:25 INFO root donut_experiment.py:62 - batch inference time:0.816169261932373 s +2023-10-26 12:45:26 INFO root donut_experiment.py:62 - batch inference time:1.3165442943572998 s +2023-10-26 12:45:27 INFO root donut_experiment.py:62 - batch inference time:1.4021646976470947 s +2023-10-26 12:45:38 INFO root donut_experiment.py:62 - batch inference time:10.282291889190674 s +2023-10-26 12:45:40 INFO root donut_experiment.py:62 - batch inference time:2.198173999786377 s +2023-10-26 12:45:41 INFO root donut_experiment.py:62 - batch inference time:0.7203667163848877 s +2023-10-26 12:45:43 INFO root donut_experiment.py:62 - batch inference time:1.8894569873809814 s +2023-10-26 12:45:44 INFO root donut_experiment.py:62 - batch inference time:1.517033576965332 s +2023-10-26 12:45:46 INFO root donut_experiment.py:62 - batch inference time:1.8525373935699463 s +2023-10-26 12:45:48 INFO root donut_experiment.py:62 - batch inference time:1.5345330238342285 s +2023-10-26 12:45:49 INFO root donut_experiment.py:62 - batch inference time:1.781435251235962 s +2023-10-26 12:45:51 INFO root donut_experiment.py:62 - batch inference time:1.607741117477417 s +2023-10-26 12:45:51 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:45:51 INFO root donut_experiment.py:72 - token_acc: 0.28057926040858544; edit_dis: 0.23405436004052044 +2023-10-26 12:45:57 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch3_step1172_lr1.923430e-05_avg_loss0.14416_token_acc0.28058_edit_dis0.23405.pth +2023-10-26 12:46:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1180, lr:1.921621e-05, step_mean_loss:0.0219029001891613, average_loss:0.14353527534847796), time, (train_step_time: 0.54924s, train_average_time: 0.56790s);(grad_norm_mean: nan, grad_norm_step: 0.90677) +2023-10-26 12:46:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1200, lr:1.917008e-05, step_mean_loss:0.02792307920753956, average_loss:0.14247774049950143), time, (train_step_time: 0.53282s, train_average_time: 0.56783s);(grad_norm_mean: nan, grad_norm_step: 3.77525) +2023-10-26 12:46:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1220, lr:1.912270e-05, step_mean_loss:0.14903870224952698, average_loss:0.1408808206181332), time, (train_step_time: 0.54863s, train_average_time: 0.56776s);(grad_norm_mean: nan, grad_norm_step: 4.50652) +2023-10-26 12:46:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1240, lr:1.907406e-05, step_mean_loss:0.016762882471084595, average_loss:0.13937565946928976), time, (train_step_time: 0.62663s, train_average_time: 0.56774s);(grad_norm_mean: nan, grad_norm_step: 1.01616) +2023-10-26 12:46:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1260, lr:1.902417e-05, step_mean_loss:0.042581360787153244, average_loss:0.13770587227107692), time, (train_step_time: 0.56848s, train_average_time: 0.56777s);(grad_norm_mean: nan, grad_norm_step: 2.50250) +2023-10-26 12:46:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1280, lr:1.897304e-05, step_mean_loss:0.17932462692260742, average_loss:0.13642522359828035), time, (train_step_time: 0.52943s, train_average_time: 0.56781s);(grad_norm_mean: nan, grad_norm_step: 4.34488) +2023-10-26 12:47:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1300, lr:1.892068e-05, step_mean_loss:0.1155647411942482, average_loss:0.13517377463191785), time, (train_step_time: 0.54566s, train_average_time: 0.56717s);(grad_norm_mean: nan, grad_norm_step: 5.43779) +2023-10-26 12:47:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1320, lr:1.886710e-05, step_mean_loss:0.03554210811853409, average_loss:0.13389506126656092), time, (train_step_time: 0.54168s, train_average_time: 0.56690s);(grad_norm_mean: nan, grad_norm_step: 1.75453) +2023-10-26 12:47:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1340, lr:1.881230e-05, step_mean_loss:0.05652482435107231, average_loss:0.13291016839841666), time, (train_step_time: 0.56082s, train_average_time: 0.56693s);(grad_norm_mean: nan, grad_norm_step: 1.34999) +2023-10-26 12:47:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1360, lr:1.875628e-05, step_mean_loss:0.14552101492881775, average_loss:0.13163808713524144), time, (train_step_time: 0.52468s, train_average_time: 0.56675s);(grad_norm_mean: nan, grad_norm_step: 4.71845) +2023-10-26 12:47:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1380, lr:1.869906e-05, step_mean_loss:0.1637205332517624, average_loss:0.13052226248255466), time, (train_step_time: 0.55853s, train_average_time: 0.56666s);(grad_norm_mean: nan, grad_norm_step: 4.35571) +2023-10-26 12:48:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1400, lr:1.864065e-05, step_mean_loss:0.04290198162198067, average_loss:0.12937430582662013), time, (train_step_time: 0.55066s, train_average_time: 0.56667s);(grad_norm_mean: nan, grad_norm_step: 4.54897) +2023-10-26 12:48:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1420, lr:1.858105e-05, step_mean_loss:0.15305393934249878, average_loss:0.12817434770055935), time, (train_step_time: 0.59036s, train_average_time: 0.56668s);(grad_norm_mean: nan, grad_norm_step: 4.73826) +2023-10-26 12:48:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1440, lr:1.852027e-05, step_mean_loss:0.00558793731033802, average_loss:0.1267690134263426), time, (train_step_time: 0.55431s, train_average_time: 0.56660s);(grad_norm_mean: nan, grad_norm_step: 0.39566) +2023-10-26 12:48:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 4, steps: 1460, lr:1.845832e-05, step_mean_loss:0.010465513914823532, average_loss:0.12561175335889957), time, (train_step_time: 0.53358s, train_average_time: 0.56642s);(grad_norm_mean: nan, grad_norm_step: 0.95619) +2023-10-26 12:48:42 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 4, steps: 1465); +2023-10-26 12:48:46 INFO root donut_experiment.py:62 - batch inference time:2.844597816467285 s +2023-10-26 12:48:56 INFO root donut_experiment.py:62 - batch inference time:10.453457832336426 s +2023-10-26 12:48:57 INFO root donut_experiment.py:62 - batch inference time:1.3967204093933105 s +2023-10-26 12:48:59 INFO root donut_experiment.py:62 - batch inference time:1.0181818008422852 s +2023-10-26 12:49:00 INFO root donut_experiment.py:62 - batch inference time:1.7081561088562012 s +2023-10-26 12:49:01 INFO root donut_experiment.py:62 - batch inference time:1.077094554901123 s +2023-10-26 12:49:02 INFO root donut_experiment.py:62 - batch inference time:0.9406242370605469 s +2023-10-26 12:49:04 INFO root donut_experiment.py:62 - batch inference time:1.3115289211273193 s +2023-10-26 12:49:05 INFO root donut_experiment.py:62 - batch inference time:1.393583059310913 s +2023-10-26 12:49:07 INFO root donut_experiment.py:62 - batch inference time:1.9479663372039795 s +2023-10-26 12:49:17 INFO root donut_experiment.py:62 - batch inference time:10.32798171043396 s +2023-10-26 12:49:18 INFO root donut_experiment.py:62 - batch inference time:0.7329902648925781 s +2023-10-26 12:49:20 INFO root donut_experiment.py:62 - batch inference time:1.826521396636963 s +2023-10-26 12:49:22 INFO root donut_experiment.py:62 - batch inference time:1.5452895164489746 s +2023-10-26 12:49:23 INFO root donut_experiment.py:62 - batch inference time:1.8609113693237305 s +2023-10-26 12:49:25 INFO root donut_experiment.py:62 - batch inference time:1.44515061378479 s +2023-10-26 12:49:27 INFO root donut_experiment.py:62 - batch inference time:1.6923539638519287 s +2023-10-26 12:49:28 INFO root donut_experiment.py:62 - batch inference time:1.643345832824707 s +2023-10-26 12:49:28 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:49:28 INFO root donut_experiment.py:72 - token_acc: 0.2606809802645847; edit_dis: 0.6796364369882922 +2023-10-26 12:49:34 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch4_step1465_lr1.844265e-05_avg_loss0.12571_token_acc0.26068_edit_dis0.67964.pth +2023-10-26 12:49:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1480, lr:1.839521e-05, step_mean_loss:0.0781347006559372, average_loss:0.12503232636250639), time, (train_step_time: 0.53726s, train_average_time: 0.56633s);(grad_norm_mean: nan, grad_norm_step: 7.35105) +2023-10-26 12:49:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1500, lr:1.833094e-05, step_mean_loss:0.021833881735801697, average_loss:0.12380706984134546), time, (train_step_time: 0.61797s, train_average_time: 0.56638s);(grad_norm_mean: nan, grad_norm_step: 0.78959) +2023-10-26 12:50:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1520, lr:1.826553e-05, step_mean_loss:0.004974003415554762, average_loss:0.12283843833356514), time, (train_step_time: 0.55505s, train_average_time: 0.56670s);(grad_norm_mean: nan, grad_norm_step: 0.45962) +2023-10-26 12:50:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1540, lr:1.819898e-05, step_mean_loss:0.10197903960943222, average_loss:0.12163159181394119), time, (train_step_time: 0.56199s, train_average_time: 0.56692s);(grad_norm_mean: nan, grad_norm_step: 7.28892) +2023-10-26 12:50:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1560, lr:1.813131e-05, step_mean_loss:0.025416681542992592, average_loss:0.12035775874026466), time, (train_step_time: 0.56800s, train_average_time: 0.56687s);(grad_norm_mean: nan, grad_norm_step: 0.70517) +2023-10-26 12:50:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1580, lr:1.806252e-05, step_mean_loss:0.004963915329426527, average_loss:0.1193779285167509), time, (train_step_time: 0.55358s, train_average_time: 0.56698s);(grad_norm_mean: nan, grad_norm_step: 0.24962) +2023-10-26 12:50:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1600, lr:1.799262e-05, step_mean_loss:0.07619065791368484, average_loss:0.11850256576479296), time, (train_step_time: 0.54514s, train_average_time: 0.56697s);(grad_norm_mean: nan, grad_norm_step: 3.79351) +2023-10-26 12:51:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1620, lr:1.792162e-05, step_mean_loss:0.017819028347730637, average_loss:0.11779423482424327), time, (train_step_time: 0.58367s, train_average_time: 0.56696s);(grad_norm_mean: nan, grad_norm_step: 2.96500) +2023-10-26 12:51:15 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1640, lr:1.784954e-05, step_mean_loss:0.022743161767721176, average_loss:0.11676843097361043), time, (train_step_time: 0.59863s, train_average_time: 0.56702s);(grad_norm_mean: nan, grad_norm_step: 1.34373) +2023-10-26 12:51:27 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1660, lr:1.777638e-05, step_mean_loss:0.05097317323088646, average_loss:0.11576584080088301), time, (train_step_time: 0.60271s, train_average_time: 0.56713s);(grad_norm_mean: nan, grad_norm_step: 4.74104) +2023-10-26 12:51:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1680, lr:1.770214e-05, step_mean_loss:0.013496343046426773, average_loss:0.11480291460299798), time, (train_step_time: 0.57217s, train_average_time: 0.56731s);(grad_norm_mean: nan, grad_norm_step: 1.63648) +2023-10-26 12:51:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1700, lr:1.762685e-05, step_mean_loss:0.006547161843627691, average_loss:0.11393091293346301), time, (train_step_time: 0.53627s, train_average_time: 0.56724s);(grad_norm_mean: nan, grad_norm_step: 0.36002) +2023-10-26 12:52:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1720, lr:1.755051e-05, step_mean_loss:0.026318063959479332, average_loss:0.11316503155482445), time, (train_step_time: 0.61808s, train_average_time: 0.56752s);(grad_norm_mean: nan, grad_norm_step: 1.31850) +2023-10-26 12:52:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 5, steps: 1740, lr:1.747314e-05, step_mean_loss:0.01657266728579998, average_loss:0.11222432516247634), time, (train_step_time: 0.53370s, train_average_time: 0.56735s);(grad_norm_mean: nan, grad_norm_step: 1.66202) +2023-10-26 12:52:23 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 5, steps: 1758); +2023-10-26 12:52:27 INFO root donut_experiment.py:62 - batch inference time:3.3723526000976562 s +2023-10-26 12:52:29 INFO root donut_experiment.py:62 - batch inference time:2.5575015544891357 s +2023-10-26 12:52:31 INFO root donut_experiment.py:62 - batch inference time:1.4448869228363037 s +2023-10-26 12:52:32 INFO root donut_experiment.py:62 - batch inference time:0.999701738357544 s +2023-10-26 12:52:34 INFO root donut_experiment.py:62 - batch inference time:1.9852938652038574 s +2023-10-26 12:52:35 INFO root donut_experiment.py:62 - batch inference time:1.1032218933105469 s +2023-10-26 12:52:36 INFO root donut_experiment.py:62 - batch inference time:0.8231048583984375 s +2023-10-26 12:52:37 INFO root donut_experiment.py:62 - batch inference time:1.3357667922973633 s +2023-10-26 12:52:39 INFO root donut_experiment.py:62 - batch inference time:1.3918499946594238 s +2023-10-26 12:52:41 INFO root donut_experiment.py:62 - batch inference time:1.9841067790985107 s +2023-10-26 12:52:42 INFO root donut_experiment.py:62 - batch inference time:1.9097657203674316 s +2023-10-26 12:52:43 INFO root donut_experiment.py:62 - batch inference time:0.8088862895965576 s +2023-10-26 12:52:45 INFO root donut_experiment.py:62 - batch inference time:2.06327223777771 s +2023-10-26 12:52:47 INFO root donut_experiment.py:62 - batch inference time:1.4993772506713867 s +2023-10-26 12:52:49 INFO root donut_experiment.py:62 - batch inference time:1.8946208953857422 s +2023-10-26 12:52:50 INFO root donut_experiment.py:62 - batch inference time:1.4411890506744385 s +2023-10-26 12:52:52 INFO root donut_experiment.py:62 - batch inference time:1.872509479522705 s +2023-10-26 12:52:54 INFO root donut_experiment.py:62 - batch inference time:1.5696008205413818 s +2023-10-26 12:52:54 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:52:54 INFO root donut_experiment.py:72 - token_acc: 0.3658310120705664; edit_dis: 0.11826065179091524 +2023-10-26 12:53:00 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch5_step1758_lr1.740262e-05_avg_loss0.11148_token_acc0.36583_edit_dis0.11826.pth +2023-10-26 12:53:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1760, lr:1.739473e-05, step_mean_loss:0.04191207140684128, average_loss:0.1114004096415804), time, (train_step_time: 0.61913s, train_average_time: 0.56725s);(grad_norm_mean: nan, grad_norm_step: 2.15373) +2023-10-26 12:53:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1780, lr:1.731531e-05, step_mean_loss:0.001049968646839261, average_loss:0.11062931168155374), time, (train_step_time: 0.55146s, train_average_time: 0.56733s);(grad_norm_mean: nan, grad_norm_step: 0.04887) +2023-10-26 12:53:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1800, lr:1.723489e-05, step_mean_loss:0.10949797928333282, average_loss:0.10973096480470203), time, (train_step_time: 0.55381s, train_average_time: 0.56737s);(grad_norm_mean: nan, grad_norm_step: 8.24943) +2023-10-26 12:53:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1820, lr:1.715347e-05, step_mean_loss:0.00954512134194374, average_loss:0.10890106807308077), time, (train_step_time: 0.52745s, train_average_time: 0.56720s);(grad_norm_mean: nan, grad_norm_step: 0.96835) +2023-10-26 12:53:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1840, lr:1.707107e-05, step_mean_loss:0.03563506901264191, average_loss:0.10801127806385605), time, (train_step_time: 0.55505s, train_average_time: 0.56717s);(grad_norm_mean: nan, grad_norm_step: 5.29041) +2023-10-26 12:53:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1860, lr:1.698769e-05, step_mean_loss:0.00196655560284853, average_loss:0.10722069537926776), time, (train_step_time: 0.53057s, train_average_time: 0.56704s);(grad_norm_mean: nan, grad_norm_step: 0.13981) +2023-10-26 12:54:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1880, lr:1.690336e-05, step_mean_loss:0.08045224845409393, average_loss:0.1064420159383828), time, (train_step_time: 0.54183s, train_average_time: 0.56689s);(grad_norm_mean: nan, grad_norm_step: 5.41067) +2023-10-26 12:54:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1900, lr:1.681808e-05, step_mean_loss:0.020124850794672966, average_loss:0.10559178825737418), time, (train_step_time: 0.62245s, train_average_time: 0.56710s);(grad_norm_mean: nan, grad_norm_step: 1.00835) +2023-10-26 12:54:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1920, lr:1.673186e-05, step_mean_loss:0.04972037672996521, average_loss:0.10493351879352607), time, (train_step_time: 0.52750s, train_average_time: 0.56673s);(grad_norm_mean: nan, grad_norm_step: 4.09143) +2023-10-26 12:54:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1940, lr:1.664471e-05, step_mean_loss:0.009351451881229877, average_loss:0.1042327988089888), time, (train_step_time: 0.54428s, train_average_time: 0.56662s);(grad_norm_mean: nan, grad_norm_step: 0.77739) +2023-10-26 12:54:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1960, lr:1.655666e-05, step_mean_loss:0.0034759757108986378, average_loss:0.10350295099636483), time, (train_step_time: 0.55729s, train_average_time: 0.56664s);(grad_norm_mean: nan, grad_norm_step: 0.36239) +2023-10-26 12:55:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 1980, lr:1.646770e-05, step_mean_loss:0.025917738676071167, average_loss:0.10269443194435984), time, (train_step_time: 0.54424s, train_average_time: 0.56655s);(grad_norm_mean: nan, grad_norm_step: 1.54415) +2023-10-26 12:55:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 2000, lr:1.637785e-05, step_mean_loss:0.03563700616359711, average_loss:0.10207415637985105), time, (train_step_time: 0.58308s, train_average_time: 0.56665s);(grad_norm_mean: nan, grad_norm_step: 3.16052) +2023-10-26 12:55:17 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 6, steps: 2000); +2023-10-26 12:55:21 INFO root donut_experiment.py:62 - batch inference time:2.510587453842163 s +2023-10-26 12:55:23 INFO root donut_experiment.py:62 - batch inference time:2.3631417751312256 s +2023-10-26 12:55:24 INFO root donut_experiment.py:62 - batch inference time:1.4020228385925293 s +2023-10-26 12:55:25 INFO root donut_experiment.py:62 - batch inference time:0.980586051940918 s +2023-10-26 12:55:27 INFO root donut_experiment.py:62 - batch inference time:1.8941032886505127 s +2023-10-26 12:55:29 INFO root donut_experiment.py:62 - batch inference time:1.4276056289672852 s +2023-10-26 12:55:30 INFO root donut_experiment.py:62 - batch inference time:0.8062100410461426 s +2023-10-26 12:55:31 INFO root donut_experiment.py:62 - batch inference time:1.290090560913086 s +2023-10-26 12:55:42 INFO root donut_experiment.py:62 - batch inference time:10.585290670394897 s +2023-10-26 12:55:45 INFO root donut_experiment.py:62 - batch inference time:2.905810594558716 s +2023-10-26 12:55:47 INFO root donut_experiment.py:62 - batch inference time:2.312265396118164 s +2023-10-26 12:55:48 INFO root donut_experiment.py:62 - batch inference time:0.8012681007385254 s +2023-10-26 12:55:50 INFO root donut_experiment.py:62 - batch inference time:1.80079984664917 s +2023-10-26 12:55:51 INFO root donut_experiment.py:62 - batch inference time:1.641875982284546 s +2023-10-26 12:55:53 INFO root donut_experiment.py:62 - batch inference time:1.795769453048706 s +2023-10-26 12:56:03 INFO root donut_experiment.py:62 - batch inference time:9.923235893249512 s +2023-10-26 12:56:05 INFO root donut_experiment.py:62 - batch inference time:1.5370216369628906 s +2023-10-26 12:56:06 INFO root donut_experiment.py:62 - batch inference time:1.5507841110229492 s +2023-10-26 12:56:06 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:56:06 INFO root donut_experiment.py:72 - token_acc: 0.19808572617561382; edit_dis: 0.821035521416872 +2023-10-26 12:56:12 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch6_step2000_lr1.637785e-05_avg_loss0.10207_token_acc0.19809_edit_dis0.82104.pth +2023-10-26 12:56:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 2020, lr:1.628713e-05, step_mean_loss:0.0008855239721015096, average_loss:0.10133401389290823), time, (train_step_time: 0.56195s, train_average_time: 0.56679s);(grad_norm_mean: nan, grad_norm_step: 0.26577) +2023-10-26 12:56:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 6, steps: 2040, lr:1.619554e-05, step_mean_loss:0.023244686424732208, average_loss:0.10070610736977816), time, (train_step_time: 0.58188s, train_average_time: 0.56669s);(grad_norm_mean: nan, grad_norm_step: 2.54589) +2023-10-26 12:56:41 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 6, steps: 2051); +2023-10-26 12:56:45 INFO root donut_experiment.py:62 - batch inference time:3.1446962356567383 s +2023-10-26 12:56:48 INFO root donut_experiment.py:62 - batch inference time:2.385782480239868 s +2023-10-26 12:56:49 INFO root donut_experiment.py:62 - batch inference time:1.4292035102844238 s +2023-10-26 12:56:50 INFO root donut_experiment.py:62 - batch inference time:1.0370874404907227 s +2023-10-26 12:56:52 INFO root donut_experiment.py:62 - batch inference time:1.9444587230682373 s +2023-10-26 12:56:53 INFO root donut_experiment.py:62 - batch inference time:1.1468069553375244 s +2023-10-26 12:57:04 INFO root donut_experiment.py:62 - batch inference time:10.413436651229858 s +2023-10-26 12:57:05 INFO root donut_experiment.py:62 - batch inference time:1.4183692932128906 s +2023-10-26 12:57:07 INFO root donut_experiment.py:62 - batch inference time:1.3837623596191406 s +2023-10-26 12:57:09 INFO root donut_experiment.py:62 - batch inference time:2.0085608959198 s +2023-10-26 12:57:11 INFO root donut_experiment.py:62 - batch inference time:2.033656597137451 s +2023-10-26 12:57:11 INFO root donut_experiment.py:62 - batch inference time:0.7330291271209717 s +2023-10-26 12:57:13 INFO root donut_experiment.py:62 - batch inference time:1.8588643074035645 s +2023-10-26 12:57:15 INFO root donut_experiment.py:62 - batch inference time:1.6268720626831055 s +2023-10-26 12:57:25 INFO root donut_experiment.py:62 - batch inference time:10.29293966293335 s +2023-10-26 12:57:27 INFO root donut_experiment.py:62 - batch inference time:1.6074228286743164 s +2023-10-26 12:57:29 INFO root donut_experiment.py:62 - batch inference time:1.8148508071899414 s +2023-10-26 12:57:30 INFO root donut_experiment.py:62 - batch inference time:1.6021370887756348 s +2023-10-26 12:57:31 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 12:57:31 INFO root donut_experiment.py:72 - token_acc: 0.2156439066551426; edit_dis: 0.5143457550303893 +2023-10-26 12:57:36 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch6_step2051_lr1.614480e-05_avg_loss0.10036_token_acc0.21564_edit_dis0.51435.pth +2023-10-26 12:57:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2060, lr:1.610310e-05, step_mean_loss:0.02104516513645649, average_loss:0.09999371639029597), time, (train_step_time: 0.63323s, train_average_time: 0.56712s);(grad_norm_mean: nan, grad_norm_step: 1.64436) +2023-10-26 12:57:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2080, lr:1.600983e-05, step_mean_loss:0.038111671805381775, average_loss:0.09941861977710617), time, (train_step_time: 0.59399s, train_average_time: 0.56729s);(grad_norm_mean: nan, grad_norm_step: 2.25046) +2023-10-26 12:58:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2100, lr:1.591572e-05, step_mean_loss:0.023509487509727478, average_loss:0.09881681181907576), time, (train_step_time: 0.59615s, train_average_time: 0.56738s);(grad_norm_mean: nan, grad_norm_step: 1.65248) +2023-10-26 12:58:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2120, lr:1.582081e-05, step_mean_loss:0.10340971499681473, average_loss:0.09808448857963052), time, (train_step_time: 0.54603s, train_average_time: 0.56734s);(grad_norm_mean: nan, grad_norm_step: 2.67859) +2023-10-26 12:58:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2140, lr:1.572509e-05, step_mean_loss:0.008153794333338737, average_loss:0.09739126194140353), time, (train_step_time: 0.53445s, train_average_time: 0.56726s);(grad_norm_mean: nan, grad_norm_step: 0.53184) +2023-10-26 12:58:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2160, lr:1.562859e-05, step_mean_loss:0.004370452370494604, average_loss:0.09678201377409332), time, (train_step_time: 0.54564s, train_average_time: 0.56713s);(grad_norm_mean: nan, grad_norm_step: 0.31474) +2023-10-26 12:58:51 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2180, lr:1.553131e-05, step_mean_loss:0.01944189891219139, average_loss:0.09606780843781745), time, (train_step_time: 0.58177s, train_average_time: 0.56721s);(grad_norm_mean: nan, grad_norm_step: 3.41362) +2023-10-26 12:59:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2200, lr:1.543328e-05, step_mean_loss:0.012889965437352657, average_loss:0.09554037002345218), time, (train_step_time: 0.58890s, train_average_time: 0.56717s);(grad_norm_mean: nan, grad_norm_step: 0.99754) +2023-10-26 12:59:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2220, lr:1.533449e-05, step_mean_loss:0.003213926451280713, average_loss:0.09480611513299828), time, (train_step_time: 0.62034s, train_average_time: 0.56730s);(grad_norm_mean: nan, grad_norm_step: 0.30350) +2023-10-26 12:59:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2240, lr:1.523498e-05, step_mean_loss:0.008790998719632626, average_loss:0.09433924912400471), time, (train_step_time: 0.63038s, train_average_time: 0.56749s);(grad_norm_mean: nan, grad_norm_step: 2.91458) +2023-10-26 12:59:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2260, lr:1.513474e-05, step_mean_loss:0.011492684483528137, average_loss:0.09371665757458783), time, (train_step_time: 0.59261s, train_average_time: 0.56745s);(grad_norm_mean: nan, grad_norm_step: 0.76013) +2023-10-26 12:59:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2280, lr:1.503380e-05, step_mean_loss:0.012945182621479034, average_loss:0.09314505289123126), time, (train_step_time: 0.61359s, train_average_time: 0.56740s);(grad_norm_mean: nan, grad_norm_step: 0.67864) +2023-10-26 13:00:00 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2300, lr:1.493217e-05, step_mean_loss:0.030238332226872444, average_loss:0.09249859384211721), time, (train_step_time: 0.54450s, train_average_time: 0.56736s);(grad_norm_mean: nan, grad_norm_step: 3.07343) +2023-10-26 13:00:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2320, lr:1.482986e-05, step_mean_loss:0.01095501147210598, average_loss:0.09192375773191402), time, (train_step_time: 0.78798s, train_average_time: 0.56743s);(grad_norm_mean: nan, grad_norm_step: 0.74734) +2023-10-26 13:00:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 7, steps: 2340, lr:1.472689e-05, step_mean_loss:0.0022446131333708763, average_loss:0.09132361425019445), time, (train_step_time: 0.61905s, train_average_time: 0.56750s);(grad_norm_mean: nan, grad_norm_step: 0.27404) +2023-10-26 13:00:25 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 7, steps: 2344); +2023-10-26 13:00:29 INFO root donut_experiment.py:62 - batch inference time:2.8983936309814453 s +2023-10-26 13:00:32 INFO root donut_experiment.py:62 - batch inference time:2.3577377796173096 s +2023-10-26 13:00:33 INFO root donut_experiment.py:62 - batch inference time:1.3385779857635498 s +2023-10-26 13:00:34 INFO root donut_experiment.py:62 - batch inference time:1.0386466979980469 s +2023-10-26 13:00:36 INFO root donut_experiment.py:62 - batch inference time:1.9057793617248535 s +2023-10-26 13:00:37 INFO root donut_experiment.py:62 - batch inference time:1.1132779121398926 s +2023-10-26 13:00:38 INFO root donut_experiment.py:62 - batch inference time:0.9102699756622314 s +2023-10-26 13:00:40 INFO root donut_experiment.py:62 - batch inference time:1.5302984714508057 s +2023-10-26 13:00:41 INFO root donut_experiment.py:62 - batch inference time:1.4125299453735352 s +2023-10-26 13:00:43 INFO root donut_experiment.py:62 - batch inference time:1.9761724472045898 s +2023-10-26 13:00:45 INFO root donut_experiment.py:62 - batch inference time:2.1192522048950195 s +2023-10-26 13:00:46 INFO root donut_experiment.py:62 - batch inference time:0.7908997535705566 s +2023-10-26 13:00:48 INFO root donut_experiment.py:62 - batch inference time:1.8877999782562256 s +2023-10-26 13:00:49 INFO root donut_experiment.py:62 - batch inference time:1.5012505054473877 s +2023-10-26 13:00:51 INFO root donut_experiment.py:62 - batch inference time:1.88533616065979 s +2023-10-26 13:00:53 INFO root donut_experiment.py:62 - batch inference time:1.51593017578125 s +2023-10-26 13:00:55 INFO root donut_experiment.py:62 - batch inference time:1.8448083400726318 s +2023-10-26 13:00:56 INFO root donut_experiment.py:62 - batch inference time:1.6011810302734375 s +2023-10-26 13:00:57 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:00:57 INFO root donut_experiment.py:72 - token_acc: 0.27340359578425294; edit_dis: 0.1293759260759491 +2023-10-26 13:01:02 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch7_step2344_lr1.470621e-05_avg_loss0.09129_token_acc0.27340_edit_dis0.12938.pth +2023-10-26 13:01:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2360, lr:1.462326e-05, step_mean_loss:0.02678653970360756, average_loss:0.09080907938587805), time, (train_step_time: 0.58433s, train_average_time: 0.56777s);(grad_norm_mean: nan, grad_norm_step: 1.17337) +2023-10-26 13:01:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2380, lr:1.451900e-05, step_mean_loss:0.10223697125911713, average_loss:0.09020944978438827), time, (train_step_time: 0.54736s, train_average_time: 0.56792s);(grad_norm_mean: nan, grad_norm_step: 13.14273) +2023-10-26 13:01:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2400, lr:1.441412e-05, step_mean_loss:0.0009999717585742474, average_loss:0.08964507591925819), time, (train_step_time: 0.54145s, train_average_time: 0.56795s);(grad_norm_mean: nan, grad_norm_step: 0.05724) +2023-10-26 13:01:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2420, lr:1.430864e-05, step_mean_loss:0.02174186334013939, average_loss:0.08910470174675185), time, (train_step_time: 0.52701s, train_average_time: 0.56787s);(grad_norm_mean: nan, grad_norm_step: 3.02703) +2023-10-26 13:01:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2440, lr:1.420256e-05, step_mean_loss:0.0034209026489406824, average_loss:0.088561805105797), time, (train_step_time: 0.57727s, train_average_time: 0.56781s);(grad_norm_mean: nan, grad_norm_step: 0.28736) +2023-10-26 13:02:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2460, lr:1.409590e-05, step_mean_loss:0.012164601124823093, average_loss:0.08803938414889782), time, (train_step_time: 0.61258s, train_average_time: 0.56798s);(grad_norm_mean: nan, grad_norm_step: 0.74961) +2023-10-26 13:02:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2480, lr:1.398869e-05, step_mean_loss:0.009265833534300327, average_loss:0.08755173133569417), time, (train_step_time: 0.54250s, train_average_time: 0.56826s);(grad_norm_mean: nan, grad_norm_step: 0.62847) +2023-10-26 13:02:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2500, lr:1.388092e-05, step_mean_loss:0.015432657673954964, average_loss:0.08697657684661099), time, (train_step_time: 0.61983s, train_average_time: 0.56840s);(grad_norm_mean: nan, grad_norm_step: 1.10122) +2023-10-26 13:02:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2520, lr:1.377262e-05, step_mean_loss:0.1467750370502472, average_loss:0.08644757994856812), time, (train_step_time: 0.60279s, train_average_time: 0.56873s);(grad_norm_mean: nan, grad_norm_step: 6.61349) +2023-10-26 13:02:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2540, lr:1.366380e-05, step_mean_loss:0.012484262697398663, average_loss:0.08588471312844584), time, (train_step_time: 0.60306s, train_average_time: 0.56907s);(grad_norm_mean: nan, grad_norm_step: 1.17074) +2023-10-26 13:03:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2560, lr:1.355448e-05, step_mean_loss:0.036601945757865906, average_loss:0.085370687031633), time, (train_step_time: 0.98316s, train_average_time: 0.57381s);(grad_norm_mean: nan, grad_norm_step: 1.36293) +2023-10-26 13:03:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2580, lr:1.344467e-05, step_mean_loss:0.021913450211286545, average_loss:0.08486336161897849), time, (train_step_time: 0.54062s, train_average_time: 0.57415s);(grad_norm_mean: nan, grad_norm_step: 3.05294) +2023-10-26 13:03:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2600, lr:1.333439e-05, step_mean_loss:0.015683496370911598, average_loss:0.08434403905066575), time, (train_step_time: 0.60179s, train_average_time: 0.57426s);(grad_norm_mean: nan, grad_norm_step: 0.94077) +2023-10-26 13:03:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 8, steps: 2620, lr:1.322364e-05, step_mean_loss:0.02852688543498516, average_loss:0.08385702896065256), time, (train_step_time: 0.54689s, train_average_time: 0.57410s);(grad_norm_mean: nan, grad_norm_step: 1.33350) +2023-10-26 13:04:06 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 8, steps: 2637); +2023-10-26 13:04:10 INFO root donut_experiment.py:62 - batch inference time:3.007889747619629 s +2023-10-26 13:04:13 INFO root donut_experiment.py:62 - batch inference time:2.4379284381866455 s +2023-10-26 13:04:14 INFO root donut_experiment.py:62 - batch inference time:1.5459911823272705 s +2023-10-26 13:04:15 INFO root donut_experiment.py:62 - batch inference time:1.0171539783477783 s +2023-10-26 13:04:17 INFO root donut_experiment.py:62 - batch inference time:1.928027629852295 s +2023-10-26 13:04:19 INFO root donut_experiment.py:62 - batch inference time:1.1556379795074463 s +2023-10-26 13:04:19 INFO root donut_experiment.py:62 - batch inference time:0.8299086093902588 s +2023-10-26 13:04:21 INFO root donut_experiment.py:62 - batch inference time:1.3371455669403076 s +2023-10-26 13:04:22 INFO root donut_experiment.py:62 - batch inference time:1.461329460144043 s +2023-10-26 13:04:33 INFO root donut_experiment.py:62 - batch inference time:10.296461343765259 s +2023-10-26 13:04:35 INFO root donut_experiment.py:62 - batch inference time:2.067692756652832 s +2023-10-26 13:04:35 INFO root donut_experiment.py:62 - batch inference time:0.7651040554046631 s +2023-10-26 13:04:37 INFO root donut_experiment.py:62 - batch inference time:1.9337785243988037 s +2023-10-26 13:04:39 INFO root donut_experiment.py:62 - batch inference time:1.3952021598815918 s +2023-10-26 13:04:41 INFO root donut_experiment.py:62 - batch inference time:1.8514294624328613 s +2023-10-26 13:04:42 INFO root donut_experiment.py:62 - batch inference time:1.403306484222412 s +2023-10-26 13:04:44 INFO root donut_experiment.py:62 - batch inference time:1.5745139122009277 s +2023-10-26 13:04:45 INFO root donut_experiment.py:62 - batch inference time:1.5342965126037598 s +2023-10-26 13:04:45 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:04:45 INFO root donut_experiment.py:72 - token_acc: 0.2939969214982042; edit_dis: 0.26604289511371293 +2023-10-26 13:04:51 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch8_step2637_lr1.312916e-05_avg_loss0.08344_token_acc0.29400_edit_dis0.26604.pth +2023-10-26 13:04:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2640, lr:1.311246e-05, step_mean_loss:0.03998997062444687, average_loss:0.08336957573389803), time, (train_step_time: 0.55867s, train_average_time: 0.57398s);(grad_norm_mean: nan, grad_norm_step: 2.06046) +2023-10-26 13:05:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2660, lr:1.300085e-05, step_mean_loss:0.002381787868216634, average_loss:0.0828740385812809), time, (train_step_time: 0.54587s, train_average_time: 0.57388s);(grad_norm_mean: nan, grad_norm_step: 0.65274) +2023-10-26 13:05:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2680, lr:1.288882e-05, step_mean_loss:0.0013704290613532066, average_loss:0.08235992996100348), time, (train_step_time: 0.56518s, train_average_time: 0.57392s);(grad_norm_mean: nan, grad_norm_step: 0.76793) +2023-10-26 13:05:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2700, lr:1.277640e-05, step_mean_loss:0.001486315974034369, average_loss:0.08196558863389429), time, (train_step_time: 0.60732s, train_average_time: 0.57389s);(grad_norm_mean: nan, grad_norm_step: 0.15913) +2023-10-26 13:05:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2720, lr:1.266360e-05, step_mean_loss:0.0022233272902667522, average_loss:0.0814783368796725), time, (train_step_time: 0.58184s, train_average_time: 0.57388s);(grad_norm_mean: nan, grad_norm_step: 0.21202) +2023-10-26 13:05:51 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2740, lr:1.255043e-05, step_mean_loss:0.004986312240362167, average_loss:0.08099260718727037), time, (train_step_time: 0.53967s, train_average_time: 0.57378s);(grad_norm_mean: nan, grad_norm_step: 0.44575) +2023-10-26 13:06:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2760, lr:1.243691e-05, step_mean_loss:0.022593505680561066, average_loss:0.08051412285478127), time, (train_step_time: 0.56071s, train_average_time: 0.57377s);(grad_norm_mean: nan, grad_norm_step: 1.61530) +2023-10-26 13:06:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2780, lr:1.232305e-05, step_mean_loss:0.04131129011511803, average_loss:0.08004715952149598), time, (train_step_time: 0.55064s, train_average_time: 0.57372s);(grad_norm_mean: nan, grad_norm_step: 3.20251) +2023-10-26 13:06:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2800, lr:1.220888e-05, step_mean_loss:0.02876238152384758, average_loss:0.07957155203483386), time, (train_step_time: 0.64469s, train_average_time: 0.57369s);(grad_norm_mean: nan, grad_norm_step: 1.20248) +2023-10-26 13:06:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2820, lr:1.209440e-05, step_mean_loss:0.00045191546087153256, average_loss:0.07926529418794591), time, (train_step_time: 0.62604s, train_average_time: 0.57377s);(grad_norm_mean: nan, grad_norm_step: 0.04515) +2023-10-26 13:06:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2840, lr:1.197964e-05, step_mean_loss:0.0918964222073555, average_loss:0.07885723692134876), time, (train_step_time: 0.56580s, train_average_time: 0.57376s);(grad_norm_mean: nan, grad_norm_step: 3.18181) +2023-10-26 13:06:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2860, lr:1.186460e-05, step_mean_loss:0.016197403892874718, average_loss:0.07845237380267583), time, (train_step_time: 0.56287s, train_average_time: 0.57361s);(grad_norm_mean: nan, grad_norm_step: 1.46776) +2023-10-26 13:07:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2880, lr:1.174931e-05, step_mean_loss:0.020301634445786476, average_loss:0.07796688721238196), time, (train_step_time: 0.58280s, train_average_time: 0.57349s);(grad_norm_mean: nan, grad_norm_step: 1.66705) +2023-10-26 13:07:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2900, lr:1.163377e-05, step_mean_loss:0.06487883627414703, average_loss:0.07754847187997727), time, (train_step_time: 0.51147s, train_average_time: 0.57343s);(grad_norm_mean: nan, grad_norm_step: 2.79383) +2023-10-26 13:07:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 9, steps: 2920, lr:1.151802e-05, step_mean_loss:0.024995312094688416, average_loss:0.07712344935284507), time, (train_step_time: 0.53562s, train_average_time: 0.57325s);(grad_norm_mean: nan, grad_norm_step: 0.88421) +2023-10-26 13:07:38 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 9, steps: 2930); +2023-10-26 13:07:42 INFO root donut_experiment.py:62 - batch inference time:2.9579479694366455 s +2023-10-26 13:07:44 INFO root donut_experiment.py:62 - batch inference time:2.3035619258880615 s +2023-10-26 13:07:45 INFO root donut_experiment.py:62 - batch inference time:1.3955881595611572 s +2023-10-26 13:07:46 INFO root donut_experiment.py:62 - batch inference time:1.016550064086914 s +2023-10-26 13:07:48 INFO root donut_experiment.py:62 - batch inference time:1.9312443733215332 s +2023-10-26 13:07:50 INFO root donut_experiment.py:62 - batch inference time:1.0504493713378906 s +2023-10-26 13:07:50 INFO root donut_experiment.py:62 - batch inference time:0.8172411918640137 s +2023-10-26 13:07:52 INFO root donut_experiment.py:62 - batch inference time:1.33341646194458 s +2023-10-26 13:07:53 INFO root donut_experiment.py:62 - batch inference time:1.5244014263153076 s +2023-10-26 13:07:55 INFO root donut_experiment.py:62 - batch inference time:2.001750946044922 s +2023-10-26 13:07:57 INFO root donut_experiment.py:62 - batch inference time:1.8003482818603516 s +2023-10-26 13:07:58 INFO root donut_experiment.py:62 - batch inference time:0.748835563659668 s +2023-10-26 13:08:00 INFO root donut_experiment.py:62 - batch inference time:1.8658132553100586 s +2023-10-26 13:08:01 INFO root donut_experiment.py:62 - batch inference time:1.472033977508545 s +2023-10-26 13:08:03 INFO root donut_experiment.py:62 - batch inference time:1.778874397277832 s +2023-10-26 13:08:05 INFO root donut_experiment.py:62 - batch inference time:1.4677774906158447 s +2023-10-26 13:08:06 INFO root donut_experiment.py:62 - batch inference time:1.6624155044555664 s +2023-10-26 13:08:08 INFO root donut_experiment.py:62 - batch inference time:1.5818688869476318 s +2023-10-26 13:08:08 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:08:08 INFO root donut_experiment.py:72 - token_acc: 0.3779874213836478; edit_dis: 0.1023371107071504 +2023-10-26 13:08:14 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch9_step2930_lr1.146006e-05_avg_loss0.07692_token_acc0.37799_edit_dis0.10234.pth +2023-10-26 13:08:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 2940, lr:1.140205e-05, step_mean_loss:0.004456227645277977, average_loss:0.0766794002448781), time, (train_step_time: 0.53843s, train_average_time: 0.57309s);(grad_norm_mean: nan, grad_norm_step: 0.53863) +2023-10-26 13:08:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 2960, lr:1.128589e-05, step_mean_loss:0.0036478713154792786, average_loss:0.07622649626947915), time, (train_step_time: 0.53128s, train_average_time: 0.57318s);(grad_norm_mean: nan, grad_norm_step: 0.69464) +2023-10-26 13:08:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 2980, lr:1.116955e-05, step_mean_loss:0.002474020002409816, average_loss:0.07578869401728029), time, (train_step_time: 0.53228s, train_average_time: 0.57318s);(grad_norm_mean: nan, grad_norm_step: 0.15298) +2023-10-26 13:08:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3000, lr:1.105306e-05, step_mean_loss:0.0215509794652462, average_loss:0.07539650147233382), time, (train_step_time: 0.60125s, train_average_time: 0.57305s);(grad_norm_mean: nan, grad_norm_step: 1.40490) +2023-10-26 13:08:54 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 10, steps: 3000); +2023-10-26 13:08:58 INFO root donut_experiment.py:62 - batch inference time:3.343324899673462 s +2023-10-26 13:09:01 INFO root donut_experiment.py:62 - batch inference time:2.373663902282715 s +2023-10-26 13:09:02 INFO root donut_experiment.py:62 - batch inference time:1.5317375659942627 s +2023-10-26 13:09:03 INFO root donut_experiment.py:62 - batch inference time:1.0890827178955078 s +2023-10-26 13:09:05 INFO root donut_experiment.py:62 - batch inference time:2.0016555786132812 s +2023-10-26 13:09:07 INFO root donut_experiment.py:62 - batch inference time:1.1348333358764648 s +2023-10-26 13:09:08 INFO root donut_experiment.py:62 - batch inference time:0.8810102939605713 s +2023-10-26 13:09:09 INFO root donut_experiment.py:62 - batch inference time:1.4018347263336182 s +2023-10-26 13:09:11 INFO root donut_experiment.py:62 - batch inference time:1.6115117073059082 s +2023-10-26 13:09:13 INFO root donut_experiment.py:62 - batch inference time:1.9763562679290771 s +2023-10-26 13:09:15 INFO root donut_experiment.py:62 - batch inference time:2.2113471031188965 s +2023-10-26 13:09:16 INFO root donut_experiment.py:62 - batch inference time:0.8216571807861328 s +2023-10-26 13:09:17 INFO root donut_experiment.py:62 - batch inference time:1.5631859302520752 s +2023-10-26 13:09:19 INFO root donut_experiment.py:62 - batch inference time:1.394623041152954 s +2023-10-26 13:09:21 INFO root donut_experiment.py:62 - batch inference time:2.1181206703186035 s +2023-10-26 13:09:22 INFO root donut_experiment.py:62 - batch inference time:1.480966329574585 s +2023-10-26 13:09:24 INFO root donut_experiment.py:62 - batch inference time:1.694610595703125 s +2023-10-26 13:09:26 INFO root donut_experiment.py:62 - batch inference time:1.6429944038391113 s +2023-10-26 13:09:26 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:09:26 INFO root donut_experiment.py:72 - token_acc: 0.3780300705737956; edit_dis: 0.13381840988807514 +2023-10-26 13:09:31 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch10_step3000_lr1.105306e-05_avg_loss0.07540_token_acc0.37803_edit_dis0.13382.pth +2023-10-26 13:09:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3020, lr:1.093641e-05, step_mean_loss:0.0004668873443733901, average_loss:0.0749693485092797), time, (train_step_time: 0.55599s, train_average_time: 0.57310s);(grad_norm_mean: nan, grad_norm_step: 0.04019) +2023-10-26 13:09:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3040, lr:1.081964e-05, step_mean_loss:0.021121181547641754, average_loss:0.07452595302272703), time, (train_step_time: 0.55865s, train_average_time: 0.57313s);(grad_norm_mean: nan, grad_norm_step: 1.90228) +2023-10-26 13:10:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3060, lr:1.070276e-05, step_mean_loss:0.012029904872179031, average_loss:0.07422968259136552), time, (train_step_time: 0.59498s, train_average_time: 0.57322s);(grad_norm_mean: nan, grad_norm_step: 1.29873) +2023-10-26 13:10:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3080, lr:1.058578e-05, step_mean_loss:0.004627993330359459, average_loss:0.0738488274198924), time, (train_step_time: 0.63796s, train_average_time: 0.57328s);(grad_norm_mean: nan, grad_norm_step: 0.52090) +2023-10-26 13:10:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3100, lr:1.046872e-05, step_mean_loss:0.0026798234321177006, average_loss:0.07348800714441055), time, (train_step_time: 0.54425s, train_average_time: 0.57321s);(grad_norm_mean: nan, grad_norm_step: 0.34023) +2023-10-26 13:10:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3120, lr:1.035160e-05, step_mean_loss:0.0024028776679188013, average_loss:0.07308366458557942), time, (train_step_time: 0.56118s, train_average_time: 0.57313s);(grad_norm_mean: nan, grad_norm_step: 0.19831) +2023-10-26 13:10:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3140, lr:1.023443e-05, step_mean_loss:0.0004800298484042287, average_loss:0.07269254335989703), time, (train_step_time: 0.53439s, train_average_time: 0.57318s);(grad_norm_mean: nan, grad_norm_step: 0.06906) +2023-10-26 13:11:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3160, lr:1.011722e-05, step_mean_loss:0.02795669622719288, average_loss:0.07229535520111335), time, (train_step_time: 0.57330s, train_average_time: 0.57310s);(grad_norm_mean: nan, grad_norm_step: 5.24508) +2023-10-26 13:11:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3180, lr:1.000000e-05, step_mean_loss:0.01726970262825489, average_loss:0.071974920117268), time, (train_step_time: 0.59074s, train_average_time: 0.57291s);(grad_norm_mean: nan, grad_norm_step: 2.77285) +2023-10-26 13:11:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3200, lr:9.882779e-06, step_mean_loss:0.012137112207710743, average_loss:0.07158986871733532), time, (train_step_time: 0.60272s, train_average_time: 0.57275s);(grad_norm_mean: nan, grad_norm_step: 1.74587) +2023-10-26 13:11:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 10, steps: 3220, lr:9.765574e-06, step_mean_loss:0.018800344318151474, average_loss:0.07120099812201706), time, (train_step_time: 0.52239s, train_average_time: 0.57272s);(grad_norm_mean: nan, grad_norm_step: 1.56632) +2023-10-26 13:11:38 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 10, steps: 3223); +2023-10-26 13:11:42 INFO root donut_experiment.py:62 - batch inference time:3.012986660003662 s +2023-10-26 13:11:45 INFO root donut_experiment.py:62 - batch inference time:2.318647623062134 s +2023-10-26 13:11:46 INFO root donut_experiment.py:62 - batch inference time:1.6481053829193115 s +2023-10-26 13:11:47 INFO root donut_experiment.py:62 - batch inference time:0.984992504119873 s +2023-10-26 13:11:49 INFO root donut_experiment.py:62 - batch inference time:1.8878326416015625 s +2023-10-26 13:11:50 INFO root donut_experiment.py:62 - batch inference time:1.0920016765594482 s +2023-10-26 13:11:51 INFO root donut_experiment.py:62 - batch inference time:0.8803489208221436 s +2023-10-26 13:11:52 INFO root donut_experiment.py:62 - batch inference time:1.3372077941894531 s +2023-10-26 13:11:54 INFO root donut_experiment.py:62 - batch inference time:1.3966970443725586 s +2023-10-26 13:11:56 INFO root donut_experiment.py:62 - batch inference time:2.1585521697998047 s +2023-10-26 13:11:58 INFO root donut_experiment.py:62 - batch inference time:1.8474400043487549 s +2023-10-26 13:11:59 INFO root donut_experiment.py:62 - batch inference time:0.8146357536315918 s +2023-10-26 13:12:00 INFO root donut_experiment.py:62 - batch inference time:1.3754265308380127 s +2023-10-26 13:12:02 INFO root donut_experiment.py:62 - batch inference time:1.4196093082427979 s +2023-10-26 13:12:04 INFO root donut_experiment.py:62 - batch inference time:1.937981128692627 s +2023-10-26 13:12:05 INFO root donut_experiment.py:62 - batch inference time:1.4805030822753906 s +2023-10-26 13:12:07 INFO root donut_experiment.py:62 - batch inference time:1.7858302593231201 s +2023-10-26 13:12:09 INFO root donut_experiment.py:62 - batch inference time:1.6401190757751465 s +2023-10-26 13:12:09 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:12:09 INFO root donut_experiment.py:72 - token_acc: 0.34139447236180903; edit_dis: 0.13201224896418304 +2023-10-26 13:12:15 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch10_step3223_lr9.747996e-06_avg_loss0.07115_token_acc0.34139_edit_dis0.13201.pth +2023-10-26 13:12:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3240, lr:9.648402e-06, step_mean_loss:0.0029014332685619593, average_loss:0.07082755637968799), time, (train_step_time: 0.53784s, train_average_time: 0.57270s);(grad_norm_mean: nan, grad_norm_step: 0.27401) +2023-10-26 13:12:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3260, lr:9.531277e-06, step_mean_loss:0.004396968521177769, average_loss:0.07044291110778543), time, (train_step_time: 0.54570s, train_average_time: 0.57248s);(grad_norm_mean: nan, grad_norm_step: 0.43349) +2023-10-26 13:12:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3280, lr:9.414217e-06, step_mean_loss:0.0004404119972605258, average_loss:0.07008825757810905), time, (train_step_time: 0.56337s, train_average_time: 0.57236s);(grad_norm_mean: nan, grad_norm_step: 0.04708) +2023-10-26 13:12:58 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3300, lr:9.297238e-06, step_mean_loss:0.01357495877891779, average_loss:0.06973974099218057), time, (train_step_time: 0.55105s, train_average_time: 0.57230s);(grad_norm_mean: nan, grad_norm_step: 1.55889) +2023-10-26 13:13:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3320, lr:9.180355e-06, step_mean_loss:0.008359353058040142, average_loss:0.06937239649472626), time, (train_step_time: 0.58455s, train_average_time: 0.57225s);(grad_norm_mean: nan, grad_norm_step: 3.12143) +2023-10-26 13:13:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3340, lr:9.063585e-06, step_mean_loss:0.0011023726547136903, average_loss:0.06906420950489883), time, (train_step_time: 0.57027s, train_average_time: 0.57224s);(grad_norm_mean: nan, grad_norm_step: 0.08666) +2023-10-26 13:13:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3360, lr:8.946944e-06, step_mean_loss:0.0026370610576123, average_loss:0.06875090841611685), time, (train_step_time: 0.52021s, train_average_time: 0.57219s);(grad_norm_mean: nan, grad_norm_step: 0.23804) +2023-10-26 13:13:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3380, lr:8.830447e-06, step_mean_loss:0.001174670411273837, average_loss:0.06836965204822597), time, (train_step_time: 0.52667s, train_average_time: 0.57222s);(grad_norm_mean: nan, grad_norm_step: 0.10525) +2023-10-26 13:13:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3400, lr:8.714111e-06, step_mean_loss:0.004157470539212227, average_loss:0.06803647973514992), time, (train_step_time: 0.58496s, train_average_time: 0.57234s);(grad_norm_mean: nan, grad_norm_step: 0.60844) +2023-10-26 13:14:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3420, lr:8.597951e-06, step_mean_loss:0.009623422287404537, average_loss:0.06770329292389309), time, (train_step_time: 0.57413s, train_average_time: 0.57224s);(grad_norm_mean: nan, grad_norm_step: 0.73925) +2023-10-26 13:14:18 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3440, lr:8.481985e-06, step_mean_loss:0.0005008400185033679, average_loss:0.06741051708950181), time, (train_step_time: 0.56205s, train_average_time: 0.57226s);(grad_norm_mean: nan, grad_norm_step: 0.07121) +2023-10-26 13:14:30 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3460, lr:8.366226e-06, step_mean_loss:0.0031018946319818497, average_loss:0.06710930017652361), time, (train_step_time: 0.55047s, train_average_time: 0.57222s);(grad_norm_mean: nan, grad_norm_step: 0.43406) +2023-10-26 13:14:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3480, lr:8.250693e-06, step_mean_loss:0.0030635534785687923, average_loss:0.06675194693940577), time, (train_step_time: 0.59271s, train_average_time: 0.57229s);(grad_norm_mean: nan, grad_norm_step: 0.23645) +2023-10-26 13:14:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 11, steps: 3500, lr:8.135399e-06, step_mean_loss:0.00034443908953107893, average_loss:0.06643232217906085), time, (train_step_time: 0.53540s, train_average_time: 0.57237s);(grad_norm_mean: nan, grad_norm_step: 0.02810) +2023-10-26 13:15:00 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 11, steps: 3516); +2023-10-26 13:15:04 INFO root donut_experiment.py:62 - batch inference time:3.291883945465088 s +2023-10-26 13:15:06 INFO root donut_experiment.py:62 - batch inference time:2.301196575164795 s +2023-10-26 13:15:08 INFO root donut_experiment.py:62 - batch inference time:1.7327885627746582 s +2023-10-26 13:15:09 INFO root donut_experiment.py:62 - batch inference time:1.191392183303833 s +2023-10-26 13:15:12 INFO root donut_experiment.py:62 - batch inference time:2.105736494064331 s +2023-10-26 13:15:13 INFO root donut_experiment.py:62 - batch inference time:1.1630067825317383 s +2023-10-26 13:15:14 INFO root donut_experiment.py:62 - batch inference time:0.8274266719818115 s +2023-10-26 13:15:15 INFO root donut_experiment.py:62 - batch inference time:1.3112468719482422 s +2023-10-26 13:15:16 INFO root donut_experiment.py:62 - batch inference time:1.4598462581634521 s +2023-10-26 13:15:18 INFO root donut_experiment.py:62 - batch inference time:2.0094499588012695 s +2023-10-26 13:15:20 INFO root donut_experiment.py:62 - batch inference time:1.9784679412841797 s +2023-10-26 13:15:21 INFO root donut_experiment.py:62 - batch inference time:0.7139852046966553 s +2023-10-26 13:15:23 INFO root donut_experiment.py:62 - batch inference time:1.8658106327056885 s +2023-10-26 13:15:25 INFO root donut_experiment.py:62 - batch inference time:1.5076217651367188 s +2023-10-26 13:15:27 INFO root donut_experiment.py:62 - batch inference time:2.058305263519287 s +2023-10-26 13:15:28 INFO root donut_experiment.py:62 - batch inference time:1.470266580581665 s +2023-10-26 13:15:30 INFO root donut_experiment.py:62 - batch inference time:1.9273662567138672 s +2023-10-26 13:15:32 INFO root donut_experiment.py:62 - batch inference time:1.6097538471221924 s +2023-10-26 13:15:32 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:15:32 INFO root donut_experiment.py:72 - token_acc: 0.3839727722772277; edit_dis: 0.11238898092869024 +2023-10-26 13:15:37 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch11_step3516_lr8.043349e-06_avg_loss0.06615_token_acc0.38397_edit_dis0.11239.pth +2023-10-26 13:15:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3520, lr:8.020362e-06, step_mean_loss:0.0006660326034761965, average_loss:0.06607857975307484), time, (train_step_time: 0.55543s, train_average_time: 0.57187s);(grad_norm_mean: nan, grad_norm_step: 0.08257) +2023-10-26 13:15:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3540, lr:7.905597e-06, step_mean_loss:0.004932323936372995, average_loss:0.06572323351154687), time, (train_step_time: 0.59229s, train_average_time: 0.57181s);(grad_norm_mean: nan, grad_norm_step: 1.65199) +2023-10-26 13:16:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3560, lr:7.791120e-06, step_mean_loss:0.011919808574020863, average_loss:0.06539335696906051), time, (train_step_time: 0.52696s, train_average_time: 0.57175s);(grad_norm_mean: nan, grad_norm_step: 0.60463) +2023-10-26 13:16:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3580, lr:7.676946e-06, step_mean_loss:0.007023095153272152, average_loss:0.06507801076039152), time, (train_step_time: 0.58171s, train_average_time: 0.57180s);(grad_norm_mean: nan, grad_norm_step: 1.25330) +2023-10-26 13:16:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3600, lr:7.563092e-06, step_mean_loss:0.0061339219100773335, average_loss:0.0647369691117294), time, (train_step_time: 0.55431s, train_average_time: 0.57179s);(grad_norm_mean: nan, grad_norm_step: 0.83088) +2023-10-26 13:16:37 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3620, lr:7.449572e-06, step_mean_loss:0.0021094614639878273, average_loss:0.06440129675110864), time, (train_step_time: 0.62499s, train_average_time: 0.57180s);(grad_norm_mean: nan, grad_norm_step: 0.31065) +2023-10-26 13:16:48 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3640, lr:7.336403e-06, step_mean_loss:0.00022897470626048744, average_loss:0.06406225836196507), time, (train_step_time: 0.53721s, train_average_time: 0.57170s);(grad_norm_mean: nan, grad_norm_step: 0.02329) +2023-10-26 13:17:00 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3660, lr:7.223600e-06, step_mean_loss:0.0033023275900632143, average_loss:0.06381481796901366), time, (train_step_time: 0.54505s, train_average_time: 0.57165s);(grad_norm_mean: nan, grad_norm_step: 0.33420) +2023-10-26 13:17:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3680, lr:7.111178e-06, step_mean_loss:0.00533125177025795, average_loss:0.06360231644802565), time, (train_step_time: 0.54041s, train_average_time: 0.57152s);(grad_norm_mean: nan, grad_norm_step: 1.54284) +2023-10-26 13:17:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3700, lr:6.999154e-06, step_mean_loss:0.0025655420031398535, average_loss:0.06330286654282817), time, (train_step_time: 0.59684s, train_average_time: 0.57149s);(grad_norm_mean: nan, grad_norm_step: 0.24766) +2023-10-26 13:17:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3720, lr:6.887541e-06, step_mean_loss:0.017182325944304466, average_loss:0.06300650878300379), time, (train_step_time: 0.54748s, train_average_time: 0.57148s);(grad_norm_mean: nan, grad_norm_step: 1.99331) +2023-10-26 13:17:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3740, lr:6.776357e-06, step_mean_loss:0.0004329663061071187, average_loss:0.06271181396888743), time, (train_step_time: 0.57034s, train_average_time: 0.57141s);(grad_norm_mean: nan, grad_norm_step: 0.04162) +2023-10-26 13:17:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3760, lr:6.665615e-06, step_mean_loss:0.001022542011924088, average_loss:0.06247433995231936), time, (train_step_time: 0.54440s, train_average_time: 0.57140s);(grad_norm_mean: nan, grad_norm_step: 0.17965) +2023-10-26 13:18:07 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3780, lr:6.555331e-06, step_mean_loss:0.00012299664376769215, average_loss:0.06215645562437607), time, (train_step_time: 0.55194s, train_average_time: 0.57127s);(grad_norm_mean: nan, grad_norm_step: 0.01203) +2023-10-26 13:18:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 12, steps: 3800, lr:6.445521e-06, step_mean_loss:0.0003426220209803432, average_loss:0.06189097085275726), time, (train_step_time: 0.56675s, train_average_time: 0.57131s);(grad_norm_mean: nan, grad_norm_step: 0.02945) +2023-10-26 13:18:24 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 12, steps: 3809); +2023-10-26 13:18:27 INFO root donut_experiment.py:62 - batch inference time:2.924800395965576 s +2023-10-26 13:18:30 INFO root donut_experiment.py:62 - batch inference time:2.3703176975250244 s +2023-10-26 13:18:31 INFO root donut_experiment.py:62 - batch inference time:1.4256820678710938 s +2023-10-26 13:18:32 INFO root donut_experiment.py:62 - batch inference time:0.972344160079956 s +2023-10-26 13:18:34 INFO root donut_experiment.py:62 - batch inference time:2.0905675888061523 s +2023-10-26 13:18:35 INFO root donut_experiment.py:62 - batch inference time:1.1347706317901611 s +2023-10-26 13:18:36 INFO root donut_experiment.py:62 - batch inference time:0.8568391799926758 s +2023-10-26 13:18:38 INFO root donut_experiment.py:62 - batch inference time:1.3554465770721436 s +2023-10-26 13:18:39 INFO root donut_experiment.py:62 - batch inference time:1.43843674659729 s +2023-10-26 13:18:41 INFO root donut_experiment.py:62 - batch inference time:2.036109447479248 s +2023-10-26 13:18:43 INFO root donut_experiment.py:62 - batch inference time:2.22918438911438 s +2023-10-26 13:18:44 INFO root donut_experiment.py:62 - batch inference time:0.7815053462982178 s +2023-10-26 13:18:46 INFO root donut_experiment.py:62 - batch inference time:1.9063525199890137 s +2023-10-26 13:18:48 INFO root donut_experiment.py:62 - batch inference time:1.4830505847930908 s +2023-10-26 13:18:50 INFO root donut_experiment.py:62 - batch inference time:1.8708555698394775 s +2023-10-26 13:18:51 INFO root donut_experiment.py:62 - batch inference time:1.448408603668213 s +2023-10-26 13:18:53 INFO root donut_experiment.py:62 - batch inference time:1.7121946811676025 s +2023-10-26 13:18:54 INFO root donut_experiment.py:62 - batch inference time:1.6619634628295898 s +2023-10-26 13:18:55 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:18:55 INFO root donut_experiment.py:72 - token_acc: 0.3380106018085438; edit_dis: 0.11137917910008234 +2023-10-26 13:19:00 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch12_step3809_lr6.396265e-06_avg_loss0.06175_token_acc0.33801_edit_dis0.11138.pth +2023-10-26 13:19:08 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3820, lr:6.336200e-06, step_mean_loss:0.00026331478147767484, average_loss:0.0615877073165674), time, (train_step_time: 0.52662s, train_average_time: 0.57123s);(grad_norm_mean: nan, grad_norm_step: 0.03684) +2023-10-26 13:19:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3840, lr:6.227381e-06, step_mean_loss:0.0009253086755052209, average_loss:0.06127817895050356), time, (train_step_time: 0.55839s, train_average_time: 0.57122s);(grad_norm_mean: nan, grad_norm_step: 0.10659) +2023-10-26 13:19:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3860, lr:6.119081e-06, step_mean_loss:0.06874058395624161, average_loss:0.061090815451748805), time, (train_step_time: 0.68634s, train_average_time: 0.57129s);(grad_norm_mean: nan, grad_norm_step: 2.47000) +2023-10-26 13:19:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3880, lr:6.011315e-06, step_mean_loss:0.004116719122976065, average_loss:0.060805785233769616), time, (train_step_time: 0.62000s, train_average_time: 0.57123s);(grad_norm_mean: nan, grad_norm_step: 0.46900) +2023-10-26 13:19:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3900, lr:5.904096e-06, step_mean_loss:0.005375952459871769, average_loss:0.0605317548820914), time, (train_step_time: 0.56820s, train_average_time: 0.57122s);(grad_norm_mean: nan, grad_norm_step: 0.76577) +2023-10-26 13:20:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3920, lr:5.797441e-06, step_mean_loss:0.009527713991701603, average_loss:0.06025964822277747), time, (train_step_time: 0.56619s, train_average_time: 0.57127s);(grad_norm_mean: nan, grad_norm_step: 0.75761) +2023-10-26 13:20:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3940, lr:5.691363e-06, step_mean_loss:0.0014461871469393373, average_loss:0.05998037198073666), time, (train_step_time: 0.55665s, train_average_time: 0.57116s);(grad_norm_mean: nan, grad_norm_step: 0.18067) +2023-10-26 13:20:27 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3960, lr:5.585876e-06, step_mean_loss:0.0032409511040896177, average_loss:0.05969490514367229), time, (train_step_time: 0.58984s, train_average_time: 0.57113s);(grad_norm_mean: nan, grad_norm_step: 0.73362) +2023-10-26 13:20:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 3980, lr:5.480997e-06, step_mean_loss:0.059092190116643906, average_loss:0.05942838735095233), time, (train_step_time: 0.52436s, train_average_time: 0.57109s);(grad_norm_mean: nan, grad_norm_step: 3.93609) +2023-10-26 13:20:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4000, lr:5.376738e-06, step_mean_loss:0.002016334794461727, average_loss:0.05916068642682376), time, (train_step_time: 0.64210s, train_average_time: 0.57107s);(grad_norm_mean: nan, grad_norm_step: 0.30605) +2023-10-26 13:20:50 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 13, steps: 4000); +2023-10-26 13:20:54 INFO root donut_experiment.py:62 - batch inference time:3.132763385772705 s +2023-10-26 13:20:57 INFO root donut_experiment.py:62 - batch inference time:2.693145513534546 s +2023-10-26 13:20:58 INFO root donut_experiment.py:62 - batch inference time:1.410940170288086 s +2023-10-26 13:20:59 INFO root donut_experiment.py:62 - batch inference time:1.0445411205291748 s +2023-10-26 13:21:01 INFO root donut_experiment.py:62 - batch inference time:1.8585216999053955 s +2023-10-26 13:21:02 INFO root donut_experiment.py:62 - batch inference time:1.0672850608825684 s +2023-10-26 13:21:03 INFO root donut_experiment.py:62 - batch inference time:0.8101487159729004 s +2023-10-26 13:21:04 INFO root donut_experiment.py:62 - batch inference time:1.3401949405670166 s +2023-10-26 13:21:06 INFO root donut_experiment.py:62 - batch inference time:1.3632874488830566 s +2023-10-26 13:21:08 INFO root donut_experiment.py:62 - batch inference time:1.9815993309020996 s +2023-10-26 13:21:10 INFO root donut_experiment.py:62 - batch inference time:2.241211175918579 s +2023-10-26 13:21:11 INFO root donut_experiment.py:62 - batch inference time:0.7667906284332275 s +2023-10-26 13:21:13 INFO root donut_experiment.py:62 - batch inference time:1.853360891342163 s +2023-10-26 13:21:14 INFO root donut_experiment.py:62 - batch inference time:1.5090179443359375 s +2023-10-26 13:21:16 INFO root donut_experiment.py:62 - batch inference time:1.856658697128296 s +2023-10-26 13:21:18 INFO root donut_experiment.py:62 - batch inference time:1.4386894702911377 s +2023-10-26 13:21:19 INFO root donut_experiment.py:62 - batch inference time:1.8648512363433838 s +2023-10-26 13:21:21 INFO root donut_experiment.py:62 - batch inference time:1.5852830410003662 s +2023-10-26 13:21:21 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:21:21 INFO root donut_experiment.py:72 - token_acc: 0.2972805933250927; edit_dis: 0.10380015809980783 +2023-10-26 13:21:27 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch13_step4000_lr5.376738e-06_avg_loss0.05916_token_acc0.29728_edit_dis0.10380.pth +2023-10-26 13:21:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4020, lr:5.273115e-06, step_mean_loss:0.000933996809180826, average_loss:0.05892530819406439), time, (train_step_time: 0.53799s, train_average_time: 0.57104s);(grad_norm_mean: nan, grad_norm_step: 0.09274) +2023-10-26 13:21:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4040, lr:5.170141e-06, step_mean_loss:0.002894126810133457, average_loss:0.058666863507747025), time, (train_step_time: 0.57559s, train_average_time: 0.57102s);(grad_norm_mean: nan, grad_norm_step: 0.54754) +2023-10-26 13:22:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4060, lr:5.067831e-06, step_mean_loss:0.01009666919708252, average_loss:0.05844456949769871), time, (train_step_time: 0.55098s, train_average_time: 0.57098s);(grad_norm_mean: nan, grad_norm_step: 1.29718) +2023-10-26 13:22:12 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4080, lr:4.966199e-06, step_mean_loss:0.00038847854011692107, average_loss:0.05818922849511601), time, (train_step_time: 0.56630s, train_average_time: 0.57088s);(grad_norm_mean: nan, grad_norm_step: 0.04318) +2023-10-26 13:22:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 13, steps: 4100, lr:4.865258e-06, step_mean_loss:0.09832204878330231, average_loss:0.0579465829204487), time, (train_step_time: 0.53742s, train_average_time: 0.57082s);(grad_norm_mean: nan, grad_norm_step: 6.68861) +2023-10-26 13:22:24 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 13, steps: 4102); +2023-10-26 13:22:28 INFO root donut_experiment.py:62 - batch inference time:3.0880608558654785 s +2023-10-26 13:22:30 INFO root donut_experiment.py:62 - batch inference time:2.3459675312042236 s +2023-10-26 13:22:32 INFO root donut_experiment.py:62 - batch inference time:1.5336627960205078 s +2023-10-26 13:22:33 INFO root donut_experiment.py:62 - batch inference time:1.0263667106628418 s +2023-10-26 13:22:35 INFO root donut_experiment.py:62 - batch inference time:1.8465666770935059 s +2023-10-26 13:22:36 INFO root donut_experiment.py:62 - batch inference time:1.081799030303955 s +2023-10-26 13:22:37 INFO root donut_experiment.py:62 - batch inference time:0.9328756332397461 s +2023-10-26 13:22:38 INFO root donut_experiment.py:62 - batch inference time:1.3627495765686035 s +2023-10-26 13:22:40 INFO root donut_experiment.py:62 - batch inference time:1.4755034446716309 s +2023-10-26 13:22:42 INFO root donut_experiment.py:62 - batch inference time:2.0486929416656494 s +2023-10-26 13:22:44 INFO root donut_experiment.py:62 - batch inference time:1.9190998077392578 s +2023-10-26 13:22:45 INFO root donut_experiment.py:62 - batch inference time:0.7943637371063232 s +2023-10-26 13:22:46 INFO root donut_experiment.py:62 - batch inference time:1.6782758235931396 s +2023-10-26 13:22:48 INFO root donut_experiment.py:62 - batch inference time:1.614283561706543 s +2023-10-26 13:22:50 INFO root donut_experiment.py:62 - batch inference time:1.82420015335083 s +2023-10-26 13:22:51 INFO root donut_experiment.py:62 - batch inference time:1.4724235534667969 s +2023-10-26 13:22:53 INFO root donut_experiment.py:62 - batch inference time:1.8756253719329834 s +2023-10-26 13:22:55 INFO root donut_experiment.py:62 - batch inference time:1.6244468688964844 s +2023-10-26 13:22:55 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:22:55 INFO root donut_experiment.py:72 - token_acc: 0.36812323168814837; edit_dis: 0.10428097528438467 +2023-10-26 13:23:00 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch13_step4102_lr4.855203e-06_avg_loss0.05792_token_acc0.36812_edit_dis0.10428.pth +2023-10-26 13:23:12 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4120, lr:4.765023e-06, step_mean_loss:0.02132527157664299, average_loss:0.05769532672805644), time, (train_step_time: 0.57360s, train_average_time: 0.57078s);(grad_norm_mean: nan, grad_norm_step: 0.51077) +2023-10-26 13:23:23 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4140, lr:4.665507e-06, step_mean_loss:0.0028076472226530313, average_loss:0.05742748405363039), time, (train_step_time: 0.55072s, train_average_time: 0.57070s);(grad_norm_mean: nan, grad_norm_step: 0.43602) +2023-10-26 13:23:34 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4160, lr:4.566725e-06, step_mean_loss:0.0006128247478045523, average_loss:0.05716332424067726), time, (train_step_time: 0.55389s, train_average_time: 0.57068s);(grad_norm_mean: nan, grad_norm_step: 0.03991) +2023-10-26 13:23:45 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4180, lr:4.468688e-06, step_mean_loss:0.0005328917177394032, average_loss:0.05689882940536223), time, (train_step_time: 0.56420s, train_average_time: 0.57068s);(grad_norm_mean: nan, grad_norm_step: 0.04062) +2023-10-26 13:23:57 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4200, lr:4.371412e-06, step_mean_loss:0.03369211032986641, average_loss:0.05664914648802031), time, (train_step_time: 0.57380s, train_average_time: 0.57076s);(grad_norm_mean: nan, grad_norm_step: 1.57950) +2023-10-26 13:24:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4220, lr:4.274910e-06, step_mean_loss:0.0038696241099387407, average_loss:0.05645403299606135), time, (train_step_time: 0.57143s, train_average_time: 0.57083s);(grad_norm_mean: nan, grad_norm_step: 0.59846) +2023-10-26 13:24:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4240, lr:4.179194e-06, step_mean_loss:0.00028062573983334005, average_loss:0.05621109976120634), time, (train_step_time: 0.55887s, train_average_time: 0.57087s);(grad_norm_mean: nan, grad_norm_step: 0.02146) +2023-10-26 13:24:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4260, lr:4.084278e-06, step_mean_loss:0.0013261850690469146, average_loss:0.055971308836333396), time, (train_step_time: 0.56657s, train_average_time: 0.57094s);(grad_norm_mean: nan, grad_norm_step: 0.13510) +2023-10-26 13:24:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4280, lr:3.990175e-06, step_mean_loss:0.0026506215799599886, average_loss:0.055728211253244384), time, (train_step_time: 0.61715s, train_average_time: 0.57087s);(grad_norm_mean: nan, grad_norm_step: 0.32342) +2023-10-26 13:24:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4300, lr:3.896897e-06, step_mean_loss:0.012818671762943268, average_loss:0.055487025752963534), time, (train_step_time: 0.70019s, train_average_time: 0.57087s);(grad_norm_mean: nan, grad_norm_step: 1.31231) +2023-10-26 13:25:06 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4320, lr:3.804459e-06, step_mean_loss:0.010021056048572063, average_loss:0.055241440843457), time, (train_step_time: 0.64849s, train_average_time: 0.57079s);(grad_norm_mean: nan, grad_norm_step: 0.86550) +2023-10-26 13:25:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4340, lr:3.712871e-06, step_mean_loss:0.00029962541884742677, average_loss:0.05500629816103947), time, (train_step_time: 0.56656s, train_average_time: 0.57074s);(grad_norm_mean: nan, grad_norm_step: 0.02739) +2023-10-26 13:25:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4360, lr:3.622148e-06, step_mean_loss:0.0016676427330821753, average_loss:0.05478759355474021), time, (train_step_time: 0.56590s, train_average_time: 0.57073s);(grad_norm_mean: nan, grad_norm_step: 0.21730) +2023-10-26 13:25:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 14, steps: 4380, lr:3.532301e-06, step_mean_loss:0.009133896790444851, average_loss:0.054580231058439725), time, (train_step_time: 0.64718s, train_average_time: 0.57073s);(grad_norm_mean: nan, grad_norm_step: 0.83807) +2023-10-26 13:25:49 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 14, steps: 4395); +2023-10-26 13:25:53 INFO root donut_experiment.py:62 - batch inference time:3.087968111038208 s +2023-10-26 13:25:55 INFO root donut_experiment.py:62 - batch inference time:2.5017261505126953 s +2023-10-26 13:25:57 INFO root donut_experiment.py:62 - batch inference time:1.478849172592163 s +2023-10-26 13:25:58 INFO root donut_experiment.py:62 - batch inference time:1.0689716339111328 s +2023-10-26 13:26:00 INFO root donut_experiment.py:62 - batch inference time:1.909209966659546 s +2023-10-26 13:26:01 INFO root donut_experiment.py:62 - batch inference time:1.0728774070739746 s +2023-10-26 13:26:02 INFO root donut_experiment.py:62 - batch inference time:0.8443808555603027 s +2023-10-26 13:26:03 INFO root donut_experiment.py:62 - batch inference time:1.3022701740264893 s +2023-10-26 13:26:04 INFO root donut_experiment.py:62 - batch inference time:1.3730030059814453 s +2023-10-26 13:26:06 INFO root donut_experiment.py:62 - batch inference time:2.0529284477233887 s +2023-10-26 13:26:08 INFO root donut_experiment.py:62 - batch inference time:1.9497709274291992 s +2023-10-26 13:26:09 INFO root donut_experiment.py:62 - batch inference time:0.7762563228607178 s +2023-10-26 13:26:11 INFO root donut_experiment.py:62 - batch inference time:1.878652572631836 s +2023-10-26 13:26:13 INFO root donut_experiment.py:62 - batch inference time:1.5200779438018799 s +2023-10-26 13:26:15 INFO root donut_experiment.py:62 - batch inference time:1.9361746311187744 s +2023-10-26 13:26:16 INFO root donut_experiment.py:62 - batch inference time:1.44045090675354 s +2023-10-26 13:26:18 INFO root donut_experiment.py:62 - batch inference time:1.741135835647583 s +2023-10-26 13:26:19 INFO root donut_experiment.py:62 - batch inference time:1.6191909313201904 s +2023-10-26 13:26:20 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:26:20 INFO root donut_experiment.py:72 - token_acc: 0.3373869660118491; edit_dis: 0.12401796283640376 +2023-10-26 13:26:25 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch14_step4395_lr3.465498e-06_avg_loss0.05441_token_acc0.33739_edit_dis0.12402.pth +2023-10-26 13:26:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4400, lr:3.443343e-06, step_mean_loss:0.0031090814154595137, average_loss:0.05434919135519315), time, (train_step_time: 0.54632s, train_average_time: 0.57069s);(grad_norm_mean: nan, grad_norm_step: 0.36085) +2023-10-26 13:26:40 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4420, lr:3.355285e-06, step_mean_loss:0.0006275874329730868, average_loss:0.05411393778493476), time, (train_step_time: 0.58230s, train_average_time: 0.57063s);(grad_norm_mean: nan, grad_norm_step: 0.04029) +2023-10-26 13:26:52 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4440, lr:3.268141e-06, step_mean_loss:0.0013718653935939074, average_loss:0.053875123470388565), time, (train_step_time: 0.57902s, train_average_time: 0.57088s);(grad_norm_mean: nan, grad_norm_step: 0.33018) +2023-10-26 13:27:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4460, lr:3.181922e-06, step_mean_loss:0.002133592963218689, average_loss:0.05363843585944109), time, (train_step_time: 0.64395s, train_average_time: 0.57111s);(grad_norm_mean: nan, grad_norm_step: 0.44294) +2023-10-26 13:27:17 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4480, lr:3.096639e-06, step_mean_loss:0.00040566574898548424, average_loss:0.0535095283468207), time, (train_step_time: 0.60304s, train_average_time: 0.57131s);(grad_norm_mean: nan, grad_norm_step: 0.06835) +2023-10-26 13:27:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4500, lr:3.012306e-06, step_mean_loss:0.00020019582007080317, average_loss:0.05328315173456172), time, (train_step_time: 0.74017s, train_average_time: 0.57409s);(grad_norm_mean: nan, grad_norm_step: 0.04485) +2023-10-26 13:27:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4520, lr:2.928932e-06, step_mean_loss:0.0001415361912222579, average_loss:0.05308246646733233), time, (train_step_time: 0.58261s, train_average_time: 0.57413s);(grad_norm_mean: nan, grad_norm_step: 0.01239) +2023-10-26 13:28:04 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4540, lr:2.846530e-06, step_mean_loss:0.0004515747132245451, average_loss:0.05287167563274645), time, (train_step_time: 0.52428s, train_average_time: 0.57412s);(grad_norm_mean: nan, grad_norm_step: 0.05520) +2023-10-26 13:28:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4560, lr:2.765111e-06, step_mean_loss:0.00018757223733700812, average_loss:0.05266309185824651), time, (train_step_time: 0.52740s, train_average_time: 0.57415s);(grad_norm_mean: nan, grad_norm_step: 0.04066) +2023-10-26 13:28:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4580, lr:2.684687e-06, step_mean_loss:0.0004956265911459923, average_loss:0.05244614810130949), time, (train_step_time: 0.55546s, train_average_time: 0.57418s);(grad_norm_mean: nan, grad_norm_step: 0.07717) +2023-10-26 13:28:39 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4600, lr:2.605267e-06, step_mean_loss:0.00036205677315592766, average_loss:0.052289600420192234), time, (train_step_time: 0.53107s, train_average_time: 0.57408s);(grad_norm_mean: nan, grad_norm_step: 0.02483) +2023-10-26 13:28:50 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4620, lr:2.526864e-06, step_mean_loss:0.0010952854063361883, average_loss:0.05207967818090277), time, (train_step_time: 0.59117s, train_average_time: 0.57403s);(grad_norm_mean: nan, grad_norm_step: 0.11986) +2023-10-26 13:29:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4640, lr:2.449487e-06, step_mean_loss:0.000563957670237869, average_loss:0.051891758315075705), time, (train_step_time: 0.53330s, train_average_time: 0.57398s);(grad_norm_mean: nan, grad_norm_step: 0.06432) +2023-10-26 13:29:13 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4660, lr:2.373148e-06, step_mean_loss:0.0009241270017810166, average_loss:0.05167599244574227), time, (train_step_time: 0.56454s, train_average_time: 0.57395s);(grad_norm_mean: nan, grad_norm_step: 0.32988) +2023-10-26 13:29:24 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 15, steps: 4680, lr:2.297857e-06, step_mean_loss:0.0003385872987564653, average_loss:0.05146203320453882), time, (train_step_time: 0.54767s, train_average_time: 0.57390s);(grad_norm_mean: nan, grad_norm_step: 0.03302) +2023-10-26 13:29:29 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 15, steps: 4688); +2023-10-26 13:29:32 INFO root donut_experiment.py:62 - batch inference time:3.071977376937866 s +2023-10-26 13:29:35 INFO root donut_experiment.py:62 - batch inference time:2.5979385375976562 s +2023-10-26 13:29:36 INFO root donut_experiment.py:62 - batch inference time:1.3739328384399414 s +2023-10-26 13:29:37 INFO root donut_experiment.py:62 - batch inference time:0.9716205596923828 s +2023-10-26 13:29:39 INFO root donut_experiment.py:62 - batch inference time:1.8265659809112549 s +2023-10-26 13:29:40 INFO root donut_experiment.py:62 - batch inference time:1.0815486907958984 s +2023-10-26 13:29:41 INFO root donut_experiment.py:62 - batch inference time:0.8070268630981445 s +2023-10-26 13:29:43 INFO root donut_experiment.py:62 - batch inference time:1.3229866027832031 s +2023-10-26 13:29:44 INFO root donut_experiment.py:62 - batch inference time:1.3782005310058594 s +2023-10-26 13:29:46 INFO root donut_experiment.py:62 - batch inference time:2.0260491371154785 s +2023-10-26 13:29:48 INFO root donut_experiment.py:62 - batch inference time:2.1528313159942627 s +2023-10-26 13:29:49 INFO root donut_experiment.py:62 - batch inference time:0.7321276664733887 s +2023-10-26 13:29:51 INFO root donut_experiment.py:62 - batch inference time:1.8247840404510498 s +2023-10-26 13:29:52 INFO root donut_experiment.py:62 - batch inference time:1.4536406993865967 s +2023-10-26 13:29:54 INFO root donut_experiment.py:62 - batch inference time:1.9490208625793457 s +2023-10-26 13:29:56 INFO root donut_experiment.py:62 - batch inference time:1.47406005859375 s +2023-10-26 13:29:58 INFO root donut_experiment.py:62 - batch inference time:1.8554790019989014 s +2023-10-26 13:30:00 INFO root donut_experiment.py:62 - batch inference time:1.752809762954712 s +2023-10-26 13:30:00 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:30:00 INFO root donut_experiment.py:72 - token_acc: 0.3649111872857588; edit_dis: 0.10304554067244137 +2023-10-26 13:30:06 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch15_step4688_lr2.268037e-06_avg_loss0.05139_token_acc0.36491_edit_dis0.10305.pth +2023-10-26 13:30:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4700, lr:2.223625e-06, step_mean_loss:0.0020890538580715656, average_loss:0.05126993654264105), time, (train_step_time: 0.58784s, train_average_time: 0.57398s);(grad_norm_mean: nan, grad_norm_step: 0.35484) +2023-10-26 13:30:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4720, lr:2.150461e-06, step_mean_loss:0.00020643406605813652, average_loss:0.05105649122551806), time, (train_step_time: 0.64595s, train_average_time: 0.57413s);(grad_norm_mean: nan, grad_norm_step: 0.01625) +2023-10-26 13:30:44 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4740, lr:2.078375e-06, step_mean_loss:0.0008159033604897559, average_loss:0.05085149736105488), time, (train_step_time: 1.53498s, train_average_time: 0.57556s);(grad_norm_mean: nan, grad_norm_step: 0.07158) +2023-10-26 13:31:11 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4760, lr:2.007378e-06, step_mean_loss:0.18246902525424957, average_loss:0.050679433991858445), time, (train_step_time: 1.47907s, train_average_time: 0.57878s);(grad_norm_mean: nan, grad_norm_step: 14.40274) +2023-10-26 13:31:38 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4780, lr:1.937480e-06, step_mean_loss:0.025042252615094185, average_loss:0.050494247420348956), time, (train_step_time: 0.97384s, train_average_time: 0.58202s);(grad_norm_mean: nan, grad_norm_step: 1.97868) +2023-10-26 13:32:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4800, lr:1.868689e-06, step_mean_loss:0.0010975654004141688, average_loss:0.05029625297137348), time, (train_step_time: 1.20695s, train_average_time: 0.58516s);(grad_norm_mean: nan, grad_norm_step: 0.13833) +2023-10-26 13:32:33 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4820, lr:1.801016e-06, step_mean_loss:0.0005316334427334368, average_loss:0.05014638554051568), time, (train_step_time: 1.07079s, train_average_time: 0.58851s);(grad_norm_mean: nan, grad_norm_step: 0.13622) +2023-10-26 13:33:01 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4840, lr:1.734469e-06, step_mean_loss:0.010414570569992065, average_loss:0.049952870328693395), time, (train_step_time: 1.49660s, train_average_time: 0.59194s);(grad_norm_mean: nan, grad_norm_step: 1.94899) +2023-10-26 13:33:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4860, lr:1.669058e-06, step_mean_loss:7.089051359798759e-05, average_loss:0.04976117460239727), time, (train_step_time: 0.78993s, train_average_time: 0.59504s);(grad_norm_mean: nan, grad_norm_step: 0.00647) +2023-10-26 13:33:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4880, lr:1.604792e-06, step_mean_loss:0.0001263426529476419, average_loss:0.04957757232760726), time, (train_step_time: 1.31337s, train_average_time: 0.59809s);(grad_norm_mean: nan, grad_norm_step: 0.02260) +2023-10-26 13:34:22 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4900, lr:1.541679e-06, step_mean_loss:0.0006848910124972463, average_loss:0.04941553508319087), time, (train_step_time: 1.60167s, train_average_time: 0.60122s);(grad_norm_mean: nan, grad_norm_step: 0.08079) +2023-10-26 13:34:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4920, lr:1.479729e-06, step_mean_loss:0.0006110534886829555, average_loss:0.04922127740289538), time, (train_step_time: 1.50655s, train_average_time: 0.60428s);(grad_norm_mean: nan, grad_norm_step: 0.04101) +2023-10-26 13:35:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4940, lr:1.418950e-06, step_mean_loss:0.00019408948719501495, average_loss:0.04904542490498548), time, (train_step_time: 1.43284s, train_average_time: 0.60729s);(grad_norm_mean: nan, grad_norm_step: 0.02880) +2023-10-26 13:35:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4960, lr:1.359349e-06, step_mean_loss:0.0016400744207203388, average_loss:0.04887204519483358), time, (train_step_time: 1.30375s, train_average_time: 0.61024s);(grad_norm_mean: nan, grad_norm_step: 0.40688) +2023-10-26 13:36:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 16, steps: 4980, lr:1.300936e-06, step_mean_loss:0.0012982898624613881, average_loss:0.04872061553500879), time, (train_step_time: 1.42869s, train_average_time: 0.61311s);(grad_norm_mean: nan, grad_norm_step: 0.20923) +2023-10-26 13:36:11 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 16, steps: 4981); +2023-10-26 13:36:16 INFO root donut_experiment.py:62 - batch inference time:3.715996265411377 s +2023-10-26 13:36:18 INFO root donut_experiment.py:62 - batch inference time:2.729652166366577 s +2023-10-26 13:36:20 INFO root donut_experiment.py:62 - batch inference time:1.6375398635864258 s +2023-10-26 13:36:21 INFO root donut_experiment.py:62 - batch inference time:1.2028846740722656 s +2023-10-26 13:36:24 INFO root donut_experiment.py:62 - batch inference time:2.218625068664551 s +2023-10-26 13:36:25 INFO root donut_experiment.py:62 - batch inference time:1.37677001953125 s +2023-10-26 13:36:26 INFO root donut_experiment.py:62 - batch inference time:1.0473077297210693 s +2023-10-26 13:36:28 INFO root donut_experiment.py:62 - batch inference time:1.5797996520996094 s +2023-10-26 13:36:29 INFO root donut_experiment.py:62 - batch inference time:1.6795058250427246 s +2023-10-26 13:36:32 INFO root donut_experiment.py:62 - batch inference time:2.3424999713897705 s +2023-10-26 13:36:34 INFO root donut_experiment.py:62 - batch inference time:2.407202959060669 s +2023-10-26 13:36:35 INFO root donut_experiment.py:62 - batch inference time:0.8718287944793701 s +2023-10-26 13:36:37 INFO root donut_experiment.py:62 - batch inference time:2.174842119216919 s +2023-10-26 13:36:39 INFO root donut_experiment.py:62 - batch inference time:1.8354191780090332 s +2023-10-26 13:36:41 INFO root donut_experiment.py:62 - batch inference time:2.1351876258850098 s +2023-10-26 13:36:43 INFO root donut_experiment.py:62 - batch inference time:1.7066168785095215 s +2023-10-26 13:36:45 INFO root donut_experiment.py:62 - batch inference time:1.955251932144165 s +2023-10-26 13:36:47 INFO root donut_experiment.py:62 - batch inference time:1.8295657634735107 s +2023-10-26 13:36:47 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:36:47 INFO root donut_experiment.py:72 - token_acc: 0.3605843953994405; edit_dis: 0.10430415257067227 +2023-10-26 13:36:52 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch16_step4981_lr1.298047e-06_avg_loss0.04871_token_acc0.36058_edit_dis0.10430.pth +2023-10-26 13:37:19 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5000, lr:1.243719e-06, step_mean_loss:0.0002249860408483073, average_loss:0.04854025230267071), time, (train_step_time: 0.83148s, train_average_time: 0.61612s);(grad_norm_mean: nan, grad_norm_step: 0.03399) +2023-10-26 13:37:19 INFO root donut_experiment.py:247 - experiment:nougat-base; eval, (epoch: 17, steps: 5000); +2023-10-26 13:37:24 INFO root donut_experiment.py:62 - batch inference time:3.4533605575561523 s +2023-10-26 13:37:26 INFO root donut_experiment.py:62 - batch inference time:2.708505153656006 s +2023-10-26 13:37:28 INFO root donut_experiment.py:62 - batch inference time:1.692082405090332 s +2023-10-26 13:37:29 INFO root donut_experiment.py:62 - batch inference time:1.190746784210205 s +2023-10-26 13:37:32 INFO root donut_experiment.py:62 - batch inference time:2.2221109867095947 s +2023-10-26 13:37:33 INFO root donut_experiment.py:62 - batch inference time:1.3055315017700195 s +2023-10-26 13:37:34 INFO root donut_experiment.py:62 - batch inference time:0.9742751121520996 s +2023-10-26 13:37:35 INFO root donut_experiment.py:62 - batch inference time:1.4906558990478516 s +2023-10-26 13:37:37 INFO root donut_experiment.py:62 - batch inference time:1.6711573600769043 s +2023-10-26 13:37:39 INFO root donut_experiment.py:62 - batch inference time:2.358745574951172 s +2023-10-26 13:37:42 INFO root donut_experiment.py:62 - batch inference time:2.5217857360839844 s +2023-10-26 13:37:43 INFO root donut_experiment.py:62 - batch inference time:0.9099891185760498 s +2023-10-26 13:37:45 INFO root donut_experiment.py:62 - batch inference time:2.099435329437256 s +2023-10-26 13:37:47 INFO root donut_experiment.py:62 - batch inference time:1.8358559608459473 s +2023-10-26 13:37:49 INFO root donut_experiment.py:62 - batch inference time:2.2174720764160156 s +2023-10-26 13:37:51 INFO root donut_experiment.py:62 - batch inference time:1.645432472229004 s +2023-10-26 13:37:53 INFO root donut_experiment.py:62 - batch inference time:2.024721384048462 s +2023-10-26 13:37:55 INFO root donut_experiment.py:62 - batch inference time:1.8395237922668457 s +2023-10-26 13:37:55 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:37:55 INFO root donut_experiment.py:72 - token_acc: 0.34049844236760124; edit_dis: 0.11494018306331191 +2023-10-26 13:38:01 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch17_step5000_lr1.243719e-06_avg_loss0.04854_token_acc0.34050_edit_dis0.11494.pth +2023-10-26 13:38:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5020, lr:1.187704e-06, step_mean_loss:0.0009944845223799348, average_loss:0.048382336109535386), time, (train_step_time: 1.09154s, train_average_time: 0.61910s);(grad_norm_mean: nan, grad_norm_step: 0.11113) +2023-10-26 13:38:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5040, lr:1.132901e-06, step_mean_loss:0.0007558423676528037, average_loss:0.04820055763381773), time, (train_step_time: 1.01259s, train_average_time: 0.62209s);(grad_norm_mean: nan, grad_norm_step: 0.14158) +2023-10-26 13:39:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5060, lr:1.079316e-06, step_mean_loss:0.0013701434945687652, average_loss:0.0480248412673527), time, (train_step_time: 1.47372s, train_average_time: 0.62473s);(grad_norm_mean: nan, grad_norm_step: 0.27835) +2023-10-26 13:39:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5080, lr:1.026956e-06, step_mean_loss:0.00026387281832285225, average_loss:0.047843286630531806), time, (train_step_time: 1.55091s, train_average_time: 0.62767s);(grad_norm_mean: nan, grad_norm_step: 0.03633) +2023-10-26 13:40:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5100, lr:9.758300e-07, step_mean_loss:0.00017637133714742959, average_loss:0.047687592917876935), time, (train_step_time: 1.46431s, train_average_time: 0.63047s);(grad_norm_mean: nan, grad_norm_step: 0.02026) +2023-10-26 13:40:42 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5120, lr:9.259438e-07, step_mean_loss:0.020609425380825996, average_loss:0.047522228646393216), time, (train_step_time: 0.98190s, train_average_time: 0.63321s);(grad_norm_mean: nan, grad_norm_step: 6.24288) +2023-10-26 13:41:09 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5140, lr:8.773045e-07, step_mean_loss:0.000384931277949363, average_loss:0.0473643814296115), time, (train_step_time: 1.51183s, train_average_time: 0.63602s);(grad_norm_mean: nan, grad_norm_step: 0.03343) +2023-10-26 13:41:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5160, lr:8.299187e-07, step_mean_loss:0.00018914150132331997, average_loss:0.047244315399694294), time, (train_step_time: 0.93798s, train_average_time: 0.63865s);(grad_norm_mean: nan, grad_norm_step: 0.02006) +2023-10-26 13:42:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5180, lr:7.837930e-07, step_mean_loss:0.00016894793952815235, average_loss:0.04707561276207547), time, (train_step_time: 1.42059s, train_average_time: 0.64153s);(grad_norm_mean: nan, grad_norm_step: 0.01132) +2023-10-26 13:42:29 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5200, lr:7.389337e-07, step_mean_loss:0.00047323337639681995, average_loss:0.046901059088571714), time, (train_step_time: 0.74501s, train_average_time: 0.64406s);(grad_norm_mean: nan, grad_norm_step: 0.03291) +2023-10-26 13:42:56 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5220, lr:6.953470e-07, step_mean_loss:0.009119275026023388, average_loss:0.046727729625062), time, (train_step_time: 1.62445s, train_average_time: 0.64671s);(grad_norm_mean: nan, grad_norm_step: 1.04822) +2023-10-26 13:43:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5240, lr:6.530389e-07, step_mean_loss:0.0004077806370332837, average_loss:0.046561498854829665), time, (train_step_time: 1.01917s, train_average_time: 0.64967s);(grad_norm_mean: nan, grad_norm_step: 0.03509) +2023-10-26 13:43:55 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 17, steps: 5260, lr:6.120152e-07, step_mean_loss:0.0016480233753100038, average_loss:0.04640774201577426), time, (train_step_time: 1.54660s, train_average_time: 0.65302s);(grad_norm_mean: nan, grad_norm_step: 0.27813) +2023-10-26 13:44:14 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 17, steps: 5274); +2023-10-26 13:44:18 INFO root donut_experiment.py:62 - batch inference time:3.2837886810302734 s +2023-10-26 13:44:21 INFO root donut_experiment.py:62 - batch inference time:2.686351776123047 s +2023-10-26 13:44:22 INFO root donut_experiment.py:62 - batch inference time:1.6818110942840576 s +2023-10-26 13:44:24 INFO root donut_experiment.py:62 - batch inference time:1.2461843490600586 s +2023-10-26 13:44:26 INFO root donut_experiment.py:62 - batch inference time:2.216259717941284 s +2023-10-26 13:44:27 INFO root donut_experiment.py:62 - batch inference time:1.3014554977416992 s +2023-10-26 13:44:28 INFO root donut_experiment.py:62 - batch inference time:0.9994277954101562 s +2023-10-26 13:44:30 INFO root donut_experiment.py:62 - batch inference time:1.5554027557373047 s +2023-10-26 13:44:31 INFO root donut_experiment.py:62 - batch inference time:1.6843969821929932 s +2023-10-26 13:44:34 INFO root donut_experiment.py:62 - batch inference time:2.3175902366638184 s +2023-10-26 13:44:36 INFO root donut_experiment.py:62 - batch inference time:2.4965922832489014 s +2023-10-26 13:44:37 INFO root donut_experiment.py:62 - batch inference time:0.8676924705505371 s +2023-10-26 13:44:39 INFO root donut_experiment.py:62 - batch inference time:2.142510414123535 s +2023-10-26 13:44:41 INFO root donut_experiment.py:62 - batch inference time:1.8705079555511475 s +2023-10-26 13:44:43 INFO root donut_experiment.py:62 - batch inference time:2.151343584060669 s +2023-10-26 13:44:45 INFO root donut_experiment.py:62 - batch inference time:1.706970453262329 s +2023-10-26 13:44:47 INFO root donut_experiment.py:62 - batch inference time:2.001699686050415 s +2023-10-26 13:44:49 INFO root donut_experiment.py:62 - batch inference time:1.897090196609497 s +2023-10-26 13:44:49 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:44:49 INFO root donut_experiment.py:72 - token_acc: 0.3993759750390016; edit_dis: 0.10093566449784645 +2023-10-26 13:44:55 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch17_step5274_lr5.840658e-07_avg_loss0.04629_token_acc0.39938_edit_dis0.10094.pth +2023-10-26 13:45:05 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5280, lr:5.722815e-07, step_mean_loss:0.00017766852397471666, average_loss:0.04625350672686044), time, (train_step_time: 1.55595s, train_average_time: 0.65563s);(grad_norm_mean: nan, grad_norm_step: 0.03810) +2023-10-26 13:45:32 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5300, lr:5.338432e-07, step_mean_loss:0.0051986780017614365, average_loss:0.04609646180720548), time, (train_step_time: 1.03661s, train_average_time: 0.65827s);(grad_norm_mean: nan, grad_norm_step: 1.22300) +2023-10-26 13:46:00 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5320, lr:4.967058e-07, step_mean_loss:0.0025013447739183903, average_loss:0.04593001515562363), time, (train_step_time: 1.43756s, train_average_time: 0.66102s);(grad_norm_mean: nan, grad_norm_step: 0.55664) +2023-10-26 13:46:26 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5340, lr:4.608742e-07, step_mean_loss:0.06390047073364258, average_loss:0.045774440026849494), time, (train_step_time: 1.22420s, train_average_time: 0.66355s);(grad_norm_mean: nan, grad_norm_step: 6.43715) +2023-10-26 13:46:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5360, lr:4.263534e-07, step_mean_loss:0.00036717511829920113, average_loss:0.04562830458942753), time, (train_step_time: 1.49834s, train_average_time: 0.66622s);(grad_norm_mean: nan, grad_norm_step: 0.05000) +2023-10-26 13:47:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5380, lr:3.931481e-07, step_mean_loss:0.01278397161513567, average_loss:0.045476985360019644), time, (train_step_time: 1.60091s, train_average_time: 0.66864s);(grad_norm_mean: nan, grad_norm_step: 2.52803) +2023-10-26 13:47:47 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5400, lr:3.612630e-07, step_mean_loss:0.0001357008732156828, average_loss:0.045319681836351974), time, (train_step_time: 1.40752s, train_average_time: 0.67112s);(grad_norm_mean: nan, grad_norm_step: 0.01535) +2023-10-26 13:48:14 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5420, lr:3.307023e-07, step_mean_loss:0.00010224759898846969, average_loss:0.04518038022174668), time, (train_step_time: 1.48841s, train_average_time: 0.67354s);(grad_norm_mean: nan, grad_norm_step: 0.01460) +2023-10-26 13:48:41 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5440, lr:3.014703e-07, step_mean_loss:0.0004498167254496366, average_loss:0.04504547505669587), time, (train_step_time: 1.41650s, train_average_time: 0.67601s);(grad_norm_mean: nan, grad_norm_step: 0.03909) +2023-10-26 13:49:08 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5460, lr:2.735709e-07, step_mean_loss:0.004730079788714647, average_loss:0.04489302833561861), time, (train_step_time: 1.46604s, train_average_time: 0.67854s);(grad_norm_mean: nan, grad_norm_step: 0.20560) +2023-10-26 13:49:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5480, lr:2.470082e-07, step_mean_loss:0.00025494693545624614, average_loss:0.04474284030557731), time, (train_step_time: 1.49037s, train_average_time: 0.68114s);(grad_norm_mean: nan, grad_norm_step: 0.01885) +2023-10-26 13:50:02 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5500, lr:2.217856e-07, step_mean_loss:0.00028867553919553757, average_loss:0.04460160871812001), time, (train_step_time: 0.88744s, train_average_time: 0.68340s);(grad_norm_mean: nan, grad_norm_step: 0.07174) +2023-10-26 13:50:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5520, lr:1.979066e-07, step_mean_loss:5.351034997147508e-05, average_loss:0.044457131083280386), time, (train_step_time: 1.42399s, train_average_time: 0.68565s);(grad_norm_mean: nan, grad_norm_step: 0.00483) +2023-10-26 13:50:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5540, lr:1.753746e-07, step_mean_loss:0.0008199093281291425, average_loss:0.044308338213398364), time, (train_step_time: 1.40865s, train_average_time: 0.68785s);(grad_norm_mean: nan, grad_norm_step: 0.10887) +2023-10-26 13:51:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 18, steps: 5560, lr:1.541926e-07, step_mean_loss:0.00019485254597384483, average_loss:0.04415500636137236), time, (train_step_time: 0.91812s, train_average_time: 0.69012s);(grad_norm_mean: nan, grad_norm_step: 0.02712) +2023-10-26 13:51:30 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 18, steps: 5567); +2023-10-26 13:51:34 INFO root donut_experiment.py:62 - batch inference time:3.5011062622070312 s +2023-10-26 13:51:37 INFO root donut_experiment.py:62 - batch inference time:2.784762382507324 s +2023-10-26 13:51:38 INFO root donut_experiment.py:62 - batch inference time:1.6120598316192627 s +2023-10-26 13:51:40 INFO root donut_experiment.py:62 - batch inference time:1.130298376083374 s +2023-10-26 13:51:42 INFO root donut_experiment.py:62 - batch inference time:2.1881937980651855 s +2023-10-26 13:51:43 INFO root donut_experiment.py:62 - batch inference time:1.2850439548492432 s +2023-10-26 13:51:44 INFO root donut_experiment.py:62 - batch inference time:0.9697444438934326 s +2023-10-26 13:51:46 INFO root donut_experiment.py:62 - batch inference time:1.5741267204284668 s +2023-10-26 13:51:47 INFO root donut_experiment.py:62 - batch inference time:1.6284911632537842 s +2023-10-26 13:51:50 INFO root donut_experiment.py:62 - batch inference time:2.333425283432007 s +2023-10-26 13:51:52 INFO root donut_experiment.py:62 - batch inference time:1.8698348999023438 s +2023-10-26 13:51:53 INFO root donut_experiment.py:62 - batch inference time:0.9146580696105957 s +2023-10-26 13:51:55 INFO root donut_experiment.py:62 - batch inference time:2.1505916118621826 s +2023-10-26 13:51:57 INFO root donut_experiment.py:62 - batch inference time:1.8410098552703857 s +2023-10-26 13:51:59 INFO root donut_experiment.py:62 - batch inference time:2.227038621902466 s +2023-10-26 13:52:01 INFO root donut_experiment.py:62 - batch inference time:1.702845811843872 s +2023-10-26 13:52:02 INFO root donut_experiment.py:62 - batch inference time:1.9150655269622803 s +2023-10-26 13:52:04 INFO root donut_experiment.py:62 - batch inference time:1.8324511051177979 s +2023-10-26 13:52:05 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:52:05 INFO root donut_experiment.py:72 - token_acc: 0.35526315789473684; edit_dis: 0.09688640393501749 +2023-10-26 13:52:10 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch18_step5567_lr1.470983e-07_avg_loss0.04410_token_acc0.35526_edit_dis0.09689.pth +2023-10-26 13:52:28 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5580, lr:1.343635e-07, step_mean_loss:0.0012552752159535885, average_loss:0.04400248852213693), time, (train_step_time: 1.41881s, train_average_time: 0.69244s);(grad_norm_mean: nan, grad_norm_step: 0.13436) +2023-10-26 13:52:54 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5600, lr:1.158901e-07, step_mean_loss:0.0002900006657000631, average_loss:0.04384983765990098), time, (train_step_time: 1.45865s, train_average_time: 0.69462s);(grad_norm_mean: nan, grad_norm_step: 0.02192) +2023-10-26 13:53:21 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5620, lr:9.877485e-08, step_mean_loss:0.0005735884769819677, average_loss:0.04370433910258577), time, (train_step_time: 1.49347s, train_average_time: 0.69694s);(grad_norm_mean: nan, grad_norm_step: 0.04788) +2023-10-26 13:53:49 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5640, lr:8.302018e-08, step_mean_loss:0.0006411934155039489, average_loss:0.04355465399368658), time, (train_step_time: 1.16916s, train_average_time: 0.69935s);(grad_norm_mean: nan, grad_norm_step: 0.04007) +2023-10-26 13:54:16 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5660, lr:6.862823e-08, step_mean_loss:0.0001265406608581543, average_loss:0.043407596732151124), time, (train_step_time: 1.43404s, train_average_time: 0.70164s);(grad_norm_mean: nan, grad_norm_step: 0.01044) +2023-10-26 13:54:43 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5680, lr:5.560096e-08, step_mean_loss:0.006310741417109966, average_loss:0.043267216413038996), time, (train_step_time: 1.49300s, train_average_time: 0.70394s);(grad_norm_mean: nan, grad_norm_step: 0.98115) +2023-10-26 13:55:10 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5700, lr:4.394018e-08, step_mean_loss:0.00023105421860236675, average_loss:0.04312413718069684), time, (train_step_time: 1.47566s, train_average_time: 0.70619s);(grad_norm_mean: nan, grad_norm_step: 0.03539) +2023-10-26 13:55:36 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5720, lr:3.364748e-08, step_mean_loss:0.001178677543066442, average_loss:0.04298269715832884), time, (train_step_time: 0.95393s, train_average_time: 0.70835s);(grad_norm_mean: nan, grad_norm_step: 0.13673) +2023-10-26 13:56:03 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5740, lr:2.472428e-08, step_mean_loss:0.00015921489102765918, average_loss:0.04284708375477625), time, (train_step_time: 1.07421s, train_average_time: 0.71054s);(grad_norm_mean: nan, grad_norm_step: 0.01693) +2023-10-26 13:56:31 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5760, lr:1.717180e-08, step_mean_loss:0.001310714171268046, average_loss:0.04274117883976329), time, (train_step_time: 1.36733s, train_average_time: 0.71291s);(grad_norm_mean: nan, grad_norm_step: 0.18632) +2023-10-26 13:56:59 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5780, lr:1.099109e-08, step_mean_loss:0.003722677007317543, average_loss:0.04261428136762371), time, (train_step_time: 1.56387s, train_average_time: 0.71526s);(grad_norm_mean: nan, grad_norm_step: 0.51957) +2023-10-26 13:57:25 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5800, lr:6.182981e-09, step_mean_loss:0.00047677280963398516, average_loss:0.04248571757731889), time, (train_step_time: 1.48388s, train_average_time: 0.71734s);(grad_norm_mean: nan, grad_norm_step: 0.07689) +2023-10-26 13:57:53 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5820, lr:2.748149e-09, step_mean_loss:0.0006451747030951083, average_loss:0.04234930451649137), time, (train_step_time: 1.47233s, train_average_time: 0.71967s);(grad_norm_mean: nan, grad_norm_step: 0.11491) +2023-10-26 13:58:20 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5840, lr:6.870608e-10, step_mean_loss:0.05702957138419151, average_loss:0.04221933759172472), time, (train_step_time: 1.32472s, train_average_time: 0.72180s);(grad_norm_mean: nan, grad_norm_step: 3.84948) +2023-10-26 13:58:46 INFO root donut_experiment.py:239 - experiment:nougat-base; train, (epoch: 19, steps: 5860, lr:0.000000e+00, step_mean_loss:9.020544894156046e-06, average_loss:0.04208399613928352), time, (train_step_time: 1.44753s, train_average_time: 0.72384s);(grad_norm_mean: nan, grad_norm_step: 0.00175) +2023-10-26 13:58:47 INFO root donut_experiment.py:260 - experiment:nougat-base; eval, (epoch: 19, steps: 5860); +2023-10-26 13:58:51 INFO root donut_experiment.py:62 - batch inference time:3.6159110069274902 s +2023-10-26 13:58:54 INFO root donut_experiment.py:62 - batch inference time:2.7375919818878174 s +2023-10-26 13:58:55 INFO root donut_experiment.py:62 - batch inference time:1.6833722591400146 s +2023-10-26 13:58:57 INFO root donut_experiment.py:62 - batch inference time:1.1622211933135986 s +2023-10-26 13:58:59 INFO root donut_experiment.py:62 - batch inference time:2.21260929107666 s +2023-10-26 13:59:00 INFO root donut_experiment.py:62 - batch inference time:1.313952922821045 s +2023-10-26 13:59:01 INFO root donut_experiment.py:62 - batch inference time:0.9949862957000732 s +2023-10-26 13:59:03 INFO root donut_experiment.py:62 - batch inference time:1.6114015579223633 s +2023-10-26 13:59:05 INFO root donut_experiment.py:62 - batch inference time:1.632873296737671 s +2023-10-26 13:59:07 INFO root donut_experiment.py:62 - batch inference time:2.345200300216675 s +2023-10-26 13:59:09 INFO root donut_experiment.py:62 - batch inference time:2.0926401615142822 s +2023-10-26 13:59:10 INFO root donut_experiment.py:62 - batch inference time:0.9385757446289062 s +2023-10-26 13:59:12 INFO root donut_experiment.py:62 - batch inference time:2.2284109592437744 s +2023-10-26 13:59:14 INFO root donut_experiment.py:62 - batch inference time:1.8521666526794434 s +2023-10-26 13:59:16 INFO root donut_experiment.py:62 - batch inference time:2.272869348526001 s +2023-10-26 13:59:18 INFO root donut_experiment.py:62 - batch inference time:1.6828498840332031 s +2023-10-26 13:59:20 INFO root donut_experiment.py:62 - batch inference time:2.0190799236297607 s +2023-10-26 13:59:22 INFO root donut_experiment.py:62 - batch inference time:1.8643078804016113 s +2023-10-26 13:59:22 INFO root donut_experiment.py:71 - evaluating... +2023-10-26 13:59:22 INFO root donut_experiment.py:72 - token_acc: 0.3212045169385194; edit_dis: 0.10040693576955698 +2023-10-26 13:59:28 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch19_step5860_lr0.000000e+00_avg_loss0.04208_token_acc0.32120_edit_dis0.10041.pth +2023-10-26 14:33:34 INFO root base_experiment.py:181 - device:cuda:0, is_master:True, device_ids:[0], is_distributed:False +2023-10-26 14:33:49 INFO root donut_experiment.py:149 - init weight from pretrained model:facebook/nougat-base +2023-10-26 14:33:53 INFO root donut_experiment.py:156 - Number of parameter: 348.69M +2023-10-26 14:33:53 INFO root donut_experiment.py:230 - use data loader with batch_size:2,num_workers:10 +2023-10-26 14:33:53 INFO root donut_experiment.py:183 - success init train data loader len:293 +2023-10-26 14:33:54 INFO root donut_experiment.py:230 - use data loader with batch_size:2,num_workers:10 +2023-10-26 14:33:54 INFO root donut_experiment.py:196 - success init eval data loader len:18 +2023-10-26 14:33:54 INFO root base_experiment.py:293 - success init optimizer and scheduler, optimizer:AdamW ( +Parameter Group 0 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.05 + +Parameter Group 1 + amsgrad: False + betas: (0.9, 0.98) + capturable: False + eps: 1e-06 + foreach: None + initial_lr: 2e-05 + lr: 0.0 + lr_scale: 1.0 + maximize: False + weight_decay: 0.0 +), scheduler:, scheduler_args:{'scheduler_type': 'cosine', 'warmup_epochs': 0, 'warmup_steps': 500}, warmup_steps:500,num_training_steps:5860, gradient_accumulator:1 +2023-10-26 14:33:54 INFO root base_experiment.py:224 - current trainer epochs:20, train_dataset_len:586, data_loader_len:293 +2023-10-26 14:34:09 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 20, lr:8.000000e-07, step_mean_loss:0.32287901639938354, average_loss:0.7305446535348892), time, (train_step_time: 0.56226s, train_average_time: 0.69130s);(grad_norm_mean: nan, grad_norm_step: 4.47059) +2023-10-26 14:34:20 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 40, lr:1.600000e-06, step_mean_loss:0.4938681721687317, average_loss:0.6360167138278484), time, (train_step_time: 0.54911s, train_average_time: 0.63437s);(grad_norm_mean: nan, grad_norm_step: 6.80188) +2023-10-26 14:34:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 60, lr:2.400000e-06, step_mean_loss:0.4527384340763092, average_loss:0.5823022119700909), time, (train_step_time: 0.62603s, train_average_time: 0.61285s);(grad_norm_mean: nan, grad_norm_step: 7.40869) +2023-10-26 14:34:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 80, lr:3.200000e-06, step_mean_loss:0.15652035176753998, average_loss:0.5221803175285459), time, (train_step_time: 0.76473s, train_average_time: 0.61364s);(grad_norm_mean: nan, grad_norm_step: 1.91563) +2023-10-26 14:34:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 100, lr:4.000000e-06, step_mean_loss:0.07431895285844803, average_loss:0.4790472977608442), time, (train_step_time: 0.61464s, train_average_time: 0.64161s);(grad_norm_mean: nan, grad_norm_step: 3.01843) +2023-10-26 14:35:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 120, lr:4.800000e-06, step_mean_loss:0.5323466062545776, average_loss:0.4311722570719818), time, (train_step_time: 0.53231s, train_average_time: 0.63403s);(grad_norm_mean: nan, grad_norm_step: 13.59695) +2023-10-26 14:35:22 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 140, lr:5.600000e-06, step_mean_loss:0.1518213450908661, average_loss:0.41059066262096167), time, (train_step_time: 0.54277s, train_average_time: 0.62546s);(grad_norm_mean: nan, grad_norm_step: 2.32460) +2023-10-26 14:35:34 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 160, lr:6.400000e-06, step_mean_loss:0.25410377979278564, average_loss:0.39638468322809783), time, (train_step_time: 0.56439s, train_average_time: 0.62012s);(grad_norm_mean: nan, grad_norm_step: 3.95280) +2023-10-26 14:35:46 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 180, lr:7.200000e-06, step_mean_loss:0.12106399238109589, average_loss:0.3728199798406826), time, (train_step_time: 0.62058s, train_average_time: 0.61772s);(grad_norm_mean: nan, grad_norm_step: 2.49192) +2023-10-26 14:35:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 200, lr:8.000000e-06, step_mean_loss:0.03725942596793175, average_loss:0.3540227706544101), time, (train_step_time: 0.59792s, train_average_time: 0.61532s);(grad_norm_mean: nan, grad_norm_step: 1.56196) +2023-10-26 14:36:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 220, lr:8.800000e-06, step_mean_loss:0.07421939074993134, average_loss:0.3376043888824907), time, (train_step_time: 0.56924s, train_average_time: 0.61158s);(grad_norm_mean: nan, grad_norm_step: 2.82832) +2023-10-26 14:36:21 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 240, lr:9.600000e-06, step_mean_loss:0.08868925273418427, average_loss:0.3234365170976768), time, (train_step_time: 0.60109s, train_average_time: 0.60893s);(grad_norm_mean: nan, grad_norm_step: 2.38034) +2023-10-26 14:36:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 260, lr:1.040000e-05, step_mean_loss:0.030674133449792862, average_loss:0.3106963514135434), time, (train_step_time: 0.57056s, train_average_time: 0.60685s);(grad_norm_mean: nan, grad_norm_step: 1.35599) +2023-10-26 14:36:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 0, steps: 280, lr:1.120000e-05, step_mean_loss:0.17411839962005615, average_loss:0.29889493840746584), time, (train_step_time: 0.53652s, train_average_time: 0.60469s);(grad_norm_mean: nan, grad_norm_step: 2.11127) +2023-10-26 14:36:52 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 0, steps: 293); +2023-10-26 14:36:55 INFO root donut_experiment.py:66 - batch inference time:2.6889195442199707 s +2023-10-26 14:36:57 INFO root donut_experiment.py:66 - batch inference time:2.0718131065368652 s +2023-10-26 14:36:59 INFO root donut_experiment.py:66 - batch inference time:1.5383079051971436 s +2023-10-26 14:37:00 INFO root donut_experiment.py:66 - batch inference time:1.0163159370422363 s +2023-10-26 14:37:02 INFO root donut_experiment.py:66 - batch inference time:2.070486068725586 s +2023-10-26 14:37:03 INFO root donut_experiment.py:66 - batch inference time:1.239321231842041 s +2023-10-26 14:37:04 INFO root donut_experiment.py:66 - batch inference time:0.8928699493408203 s +2023-10-26 14:37:06 INFO root donut_experiment.py:66 - batch inference time:1.4937443733215332 s +2023-10-26 14:37:07 INFO root donut_experiment.py:66 - batch inference time:1.494204044342041 s +2023-10-26 14:37:18 INFO root donut_experiment.py:66 - batch inference time:10.490166187286377 s +2023-10-26 14:37:20 INFO root donut_experiment.py:66 - batch inference time:1.7840697765350342 s +2023-10-26 14:37:21 INFO root donut_experiment.py:66 - batch inference time:0.806410551071167 s +2023-10-26 14:37:23 INFO root donut_experiment.py:66 - batch inference time:2.0227389335632324 s +2023-10-26 14:37:24 INFO root donut_experiment.py:66 - batch inference time:1.714538335800171 s +2023-10-26 14:37:35 INFO root donut_experiment.py:66 - batch inference time:10.700222730636597 s +2023-10-26 14:37:45 INFO root donut_experiment.py:66 - batch inference time:10.281785011291504 s +2023-10-26 14:37:47 INFO root donut_experiment.py:66 - batch inference time:1.7090108394622803 s +2023-10-26 14:37:49 INFO root donut_experiment.py:66 - batch inference time:1.850147008895874 s +2023-10-26 14:37:49 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 14:37:49 INFO root donut_experiment.py:76 - token_acc: 0.22003408445370196; edit_dis: 0.5493236953605117 +2023-10-26 14:37:54 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch0_step293_lr1.172000e-05_avg_loss0.29605_token_acc0.22003_edit_dis0.54932.pth +2023-10-26 14:37:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 300, lr:1.200000e-05, step_mean_loss:0.14077003300189972, average_loss:0.2927570499479771), time, (train_step_time: 0.54426s, train_average_time: 0.60219s);(grad_norm_mean: nan, grad_norm_step: 2.87210) +2023-10-26 14:38:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 320, lr:1.280000e-05, step_mean_loss:0.03654462844133377, average_loss:0.28253773113829084), time, (train_step_time: 0.57138s, train_average_time: 0.60288s);(grad_norm_mean: nan, grad_norm_step: 1.09785) +2023-10-26 14:38:23 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 340, lr:1.360000e-05, step_mean_loss:0.3327745497226715, average_loss:0.27365192901343105), time, (train_step_time: 0.64145s, train_average_time: 0.60169s);(grad_norm_mean: nan, grad_norm_step: 22.71841) +2023-10-26 14:38:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 360, lr:1.440000e-05, step_mean_loss:0.09481219202280045, average_loss:0.26443603556706674), time, (train_step_time: 0.55382s, train_average_time: 0.60091s);(grad_norm_mean: nan, grad_norm_step: 5.97764) +2023-10-26 14:38:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 380, lr:1.520000e-05, step_mean_loss:0.24224494397640228, average_loss:0.2567061645199398), time, (train_step_time: 0.54829s, train_average_time: 0.59983s);(grad_norm_mean: nan, grad_norm_step: 4.76520) +2023-10-26 14:38:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 400, lr:1.600000e-05, step_mean_loss:0.14054645597934723, average_loss:0.2498314617795404), time, (train_step_time: 0.54839s, train_average_time: 0.59846s);(grad_norm_mean: nan, grad_norm_step: 3.35154) +2023-10-26 14:39:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 420, lr:1.680000e-05, step_mean_loss:0.11570808291435242, average_loss:0.24517056335628565), time, (train_step_time: 0.63566s, train_average_time: 0.59735s);(grad_norm_mean: nan, grad_norm_step: 8.25714) +2023-10-26 14:39:21 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 440, lr:1.760000e-05, step_mean_loss:0.0681033581495285, average_loss:0.23968479251383212), time, (train_step_time: 0.58976s, train_average_time: 0.59579s);(grad_norm_mean: nan, grad_norm_step: 2.89834) +2023-10-26 14:39:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 460, lr:1.840000e-05, step_mean_loss:0.12771596014499664, average_loss:0.235526438493727), time, (train_step_time: 0.55129s, train_average_time: 0.59508s);(grad_norm_mean: nan, grad_norm_step: 2.19905) +2023-10-26 14:39:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 480, lr:1.920000e-05, step_mean_loss:0.08283228427171707, average_loss:0.23411345709561526), time, (train_step_time: 0.56636s, train_average_time: 0.59422s);(grad_norm_mean: nan, grad_norm_step: 2.36676) +2023-10-26 14:39:55 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 500, lr:2.000000e-05, step_mean_loss:0.36745020747184753, average_loss:0.2310562332002446), time, (train_step_time: 0.53272s, train_average_time: 0.59340s);(grad_norm_mean: nan, grad_norm_step: 5.46637) +2023-10-26 14:40:07 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 520, lr:1.999931e-05, step_mean_loss:0.2682710886001587, average_loss:0.2269441632831541), time, (train_step_time: 0.58336s, train_average_time: 0.59341s);(grad_norm_mean: nan, grad_norm_step: 5.66462) +2023-10-26 14:40:20 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 540, lr:1.999725e-05, step_mean_loss:0.10865110158920288, average_loss:0.2224767069875573), time, (train_step_time: 0.82033s, train_average_time: 0.59447s);(grad_norm_mean: nan, grad_norm_step: 3.28872) +2023-10-26 14:40:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 560, lr:1.999382e-05, step_mean_loss:0.09415590018033981, average_loss:0.21889413824620924), time, (train_step_time: 0.76867s, train_average_time: 0.59599s);(grad_norm_mean: nan, grad_norm_step: 4.30516) +2023-10-26 14:40:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 1, steps: 580, lr:1.998901e-05, step_mean_loss:0.19063536822795868, average_loss:0.21603059930132767), time, (train_step_time: 0.55406s, train_average_time: 0.59551s);(grad_norm_mean: nan, grad_norm_step: 4.66258) +2023-10-26 14:40:48 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 1, steps: 586); +2023-10-26 14:40:52 INFO root donut_experiment.py:66 - batch inference time:2.9779446125030518 s +2023-10-26 14:40:54 INFO root donut_experiment.py:66 - batch inference time:2.396714687347412 s +2023-10-26 14:40:56 INFO root donut_experiment.py:66 - batch inference time:1.4968557357788086 s +2023-10-26 14:40:57 INFO root donut_experiment.py:66 - batch inference time:0.9801023006439209 s +2023-10-26 14:40:59 INFO root donut_experiment.py:66 - batch inference time:1.8941030502319336 s +2023-10-26 14:41:00 INFO root donut_experiment.py:66 - batch inference time:1.089426040649414 s +2023-10-26 14:41:01 INFO root donut_experiment.py:66 - batch inference time:0.830420732498169 s +2023-10-26 14:41:02 INFO root donut_experiment.py:66 - batch inference time:1.3120064735412598 s +2023-10-26 14:41:04 INFO root donut_experiment.py:66 - batch inference time:1.6279544830322266 s +2023-10-26 14:41:06 INFO root donut_experiment.py:66 - batch inference time:2.056117534637451 s +2023-10-26 14:41:17 INFO root donut_experiment.py:66 - batch inference time:10.919866800308228 s +2023-10-26 14:41:17 INFO root donut_experiment.py:66 - batch inference time:0.8321735858917236 s +2023-10-26 14:41:20 INFO root donut_experiment.py:66 - batch inference time:2.106509208679199 s +2023-10-26 14:41:21 INFO root donut_experiment.py:66 - batch inference time:1.5411486625671387 s +2023-10-26 14:41:31 INFO root donut_experiment.py:66 - batch inference time:10.306913614273071 s +2023-10-26 14:41:33 INFO root donut_experiment.py:66 - batch inference time:1.4789459705352783 s +2023-10-26 14:41:35 INFO root donut_experiment.py:66 - batch inference time:1.712752103805542 s +2023-10-26 14:41:36 INFO root donut_experiment.py:66 - batch inference time:1.5482523441314697 s +2023-10-26 14:41:37 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 14:41:37 INFO root donut_experiment.py:76 - token_acc: 0.26583685263391865; edit_dis: 0.2991526643198269 +2023-10-26 14:41:42 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch1_step586_lr1.998730e-05_avg_loss0.21518_token_acc0.26584_edit_dis0.29915.pth +2023-10-26 14:41:51 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 600, lr:1.998283e-05, step_mean_loss:0.09429334104061127, average_loss:0.21229814617196097), time, (train_step_time: 0.64025s, train_average_time: 0.59502s);(grad_norm_mean: nan, grad_norm_step: 3.84758) +2023-10-26 14:42:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 620, lr:1.997528e-05, step_mean_loss:0.07445822656154633, average_loss:0.20887055192818685), time, (train_step_time: 0.54185s, train_average_time: 0.59424s);(grad_norm_mean: nan, grad_norm_step: 2.99625) +2023-10-26 14:42:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 640, lr:1.996635e-05, step_mean_loss:0.09942490607500076, average_loss:0.20523451380358892), time, (train_step_time: 0.64041s, train_average_time: 0.59397s);(grad_norm_mean: nan, grad_norm_step: 2.58256) +2023-10-26 14:42:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 660, lr:1.995606e-05, step_mean_loss:0.04842619225382805, average_loss:0.20248046752248863), time, (train_step_time: 0.55549s, train_average_time: 0.59356s);(grad_norm_mean: nan, grad_norm_step: 1.29758) +2023-10-26 14:42:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 680, lr:1.994440e-05, step_mean_loss:0.23108252882957458, average_loss:0.19999685220160138), time, (train_step_time: 0.56951s, train_average_time: 0.59307s);(grad_norm_mean: nan, grad_norm_step: 3.13531) +2023-10-26 14:42:49 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 700, lr:1.993137e-05, step_mean_loss:0.1210678368806839, average_loss:0.19600731816741504), time, (train_step_time: 0.56587s, train_average_time: 0.59256s);(grad_norm_mean: nan, grad_norm_step: 3.71444) +2023-10-26 14:43:00 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 720, lr:1.991698e-05, step_mean_loss:0.08746690303087234, average_loss:0.19264222032765652), time, (train_step_time: 0.57376s, train_average_time: 0.59179s);(grad_norm_mean: nan, grad_norm_step: 2.79547) +2023-10-26 14:43:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 740, lr:1.990123e-05, step_mean_loss:0.06967191398143768, average_loss:0.18956194289116743), time, (train_step_time: 0.62968s, train_average_time: 0.59171s);(grad_norm_mean: nan, grad_norm_step: 3.35120) +2023-10-26 14:43:24 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 760, lr:1.988411e-05, step_mean_loss:0.008655752055346966, average_loss:0.18709167693067635), time, (train_step_time: 0.53452s, train_average_time: 0.59140s);(grad_norm_mean: nan, grad_norm_step: 0.50130) +2023-10-26 14:43:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 780, lr:1.986564e-05, step_mean_loss:0.14645443856716156, average_loss:0.18439459878270728), time, (train_step_time: 0.68648s, train_average_time: 0.59099s);(grad_norm_mean: nan, grad_norm_step: 2.06006) +2023-10-26 14:43:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 800, lr:1.984581e-05, step_mean_loss:0.031054282560944557, average_loss:0.18181629069556948), time, (train_step_time: 0.55155s, train_average_time: 0.59075s);(grad_norm_mean: nan, grad_norm_step: 0.99603) +2023-10-26 14:43:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 820, lr:1.982463e-05, step_mean_loss:0.03613365814089775, average_loss:0.18031107744417812), time, (train_step_time: 0.57965s, train_average_time: 0.59053s);(grad_norm_mean: nan, grad_norm_step: 1.70683) +2023-10-26 14:44:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 840, lr:1.980209e-05, step_mean_loss:0.052901867777109146, average_loss:0.17753068228041577), time, (train_step_time: 0.57630s, train_average_time: 0.59025s);(grad_norm_mean: nan, grad_norm_step: 1.27137) +2023-10-26 14:44:22 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 2, steps: 860, lr:1.977821e-05, step_mean_loss:0.09550926834344864, average_loss:0.1756646048234299), time, (train_step_time: 0.55495s, train_average_time: 0.58974s);(grad_norm_mean: nan, grad_norm_step: 2.36861) +2023-10-26 14:44:33 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 2, steps: 879); +2023-10-26 14:44:37 INFO root donut_experiment.py:66 - batch inference time:2.9130146503448486 s +2023-10-26 14:44:39 INFO root donut_experiment.py:66 - batch inference time:2.355313777923584 s +2023-10-26 14:44:40 INFO root donut_experiment.py:66 - batch inference time:1.4820213317871094 s +2023-10-26 14:44:42 INFO root donut_experiment.py:66 - batch inference time:1.0204105377197266 s +2023-10-26 14:44:43 INFO root donut_experiment.py:66 - batch inference time:1.8843650817871094 s +2023-10-26 14:44:45 INFO root donut_experiment.py:66 - batch inference time:1.328056812286377 s +2023-10-26 14:44:46 INFO root donut_experiment.py:66 - batch inference time:1.0316002368927002 s +2023-10-26 14:44:47 INFO root donut_experiment.py:66 - batch inference time:1.5554122924804688 s +2023-10-26 14:44:49 INFO root donut_experiment.py:66 - batch inference time:1.5870625972747803 s +2023-10-26 14:44:51 INFO root donut_experiment.py:66 - batch inference time:2.0280961990356445 s +2023-10-26 14:45:02 INFO root donut_experiment.py:66 - batch inference time:10.5877046585083 s +2023-10-26 14:45:02 INFO root donut_experiment.py:66 - batch inference time:0.7179694175720215 s +2023-10-26 14:45:04 INFO root donut_experiment.py:66 - batch inference time:1.822394609451294 s +2023-10-26 14:45:06 INFO root donut_experiment.py:66 - batch inference time:1.5095958709716797 s +2023-10-26 14:45:16 INFO root donut_experiment.py:66 - batch inference time:10.238460063934326 s +2023-10-26 14:45:26 INFO root donut_experiment.py:66 - batch inference time:10.189006567001343 s +2023-10-26 14:45:28 INFO root donut_experiment.py:66 - batch inference time:1.701195478439331 s +2023-10-26 14:45:30 INFO root donut_experiment.py:66 - batch inference time:1.6973676681518555 s +2023-10-26 14:45:30 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 14:45:30 INFO root donut_experiment.py:76 - token_acc: 0.21848421848421848; edit_dis: 0.5781351665885567 +2023-10-26 14:45:36 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch2_step879_lr1.975428e-05_avg_loss0.17350_token_acc0.21848_edit_dis0.57814.pth +2023-10-26 14:45:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 880, lr:1.975299e-05, step_mean_loss:0.01768566109240055, average_loss:0.1733230028883554), time, (train_step_time: 0.71693s, train_average_time: 0.58932s);(grad_norm_mean: nan, grad_norm_step: 1.03557) +2023-10-26 14:45:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 900, lr:1.972643e-05, step_mean_loss:0.5669965744018555, average_loss:0.17127894475383476), time, (train_step_time: 0.83975s, train_average_time: 0.60047s);(grad_norm_mean: nan, grad_norm_step: 4.14297) +2023-10-26 14:46:27 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 920, lr:1.969853e-05, step_mean_loss:0.0607680045068264, average_loss:0.16957947279114033), time, (train_step_time: 1.54439s, train_average_time: 0.61707s);(grad_norm_mean: nan, grad_norm_step: 3.51395) +2023-10-26 14:46:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 940, lr:1.966930e-05, step_mean_loss:0.008436296135187149, average_loss:0.1674231039731704), time, (train_step_time: 1.47153s, train_average_time: 0.63214s);(grad_norm_mean: nan, grad_norm_step: 0.35042) +2023-10-26 14:47:20 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 960, lr:1.963874e-05, step_mean_loss:0.09912149608135223, average_loss:0.1649097834257797), time, (train_step_time: 1.35680s, train_average_time: 0.64749s);(grad_norm_mean: nan, grad_norm_step: 3.28797) +2023-10-26 14:47:48 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 980, lr:1.960685e-05, step_mean_loss:0.015421700663864613, average_loss:0.16258355369458774), time, (train_step_time: 1.62637s, train_average_time: 0.66268s);(grad_norm_mean: nan, grad_norm_step: 1.68474) +2023-10-26 14:48:16 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1000, lr:1.957365e-05, step_mean_loss:0.11040584743022919, average_loss:0.1611647469175514), time, (train_step_time: 1.65918s, train_average_time: 0.67756s);(grad_norm_mean: nan, grad_norm_step: 3.20565) +2023-10-26 14:48:16 INFO root donut_experiment.py:251 - experiment:nougat-base; eval, (epoch: 3, steps: 1000); +2023-10-26 14:48:21 INFO root donut_experiment.py:66 - batch inference time:3.6142780780792236 s +2023-10-26 14:48:24 INFO root donut_experiment.py:66 - batch inference time:2.845097780227661 s +2023-10-26 14:48:26 INFO root donut_experiment.py:66 - batch inference time:1.7464666366577148 s +2023-10-26 14:48:27 INFO root donut_experiment.py:66 - batch inference time:1.172882318496704 s +2023-10-26 14:48:29 INFO root donut_experiment.py:66 - batch inference time:2.1499903202056885 s +2023-10-26 14:48:30 INFO root donut_experiment.py:66 - batch inference time:1.1905937194824219 s +2023-10-26 14:48:31 INFO root donut_experiment.py:66 - batch inference time:0.9985530376434326 s +2023-10-26 14:48:33 INFO root donut_experiment.py:66 - batch inference time:1.5497932434082031 s +2023-10-26 14:48:34 INFO root donut_experiment.py:66 - batch inference time:1.6461918354034424 s +2023-10-26 14:48:37 INFO root donut_experiment.py:66 - batch inference time:2.338271141052246 s +2023-10-26 14:48:39 INFO root donut_experiment.py:66 - batch inference time:2.3517203330993652 s +2023-10-26 14:48:40 INFO root donut_experiment.py:66 - batch inference time:0.8603196144104004 s +2023-10-26 14:48:42 INFO root donut_experiment.py:66 - batch inference time:2.128169298171997 s +2023-10-26 14:48:44 INFO root donut_experiment.py:66 - batch inference time:1.6509039402008057 s +2023-10-26 14:48:56 INFO root donut_experiment.py:66 - batch inference time:11.863871812820435 s +2023-10-26 14:48:57 INFO root donut_experiment.py:66 - batch inference time:1.4394407272338867 s +2023-10-26 14:48:59 INFO root donut_experiment.py:66 - batch inference time:1.6865694522857666 s +2023-10-26 14:49:11 INFO root donut_experiment.py:66 - batch inference time:12.079824686050415 s +2023-10-26 14:49:11 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 14:49:11 INFO root donut_experiment.py:76 - token_acc: 0.25863596102745795; edit_dis: 0.4210182578601975 +2023-10-26 14:49:17 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch3_step1000_lr1.957365e-05_avg_loss0.16116_token_acc0.25864_edit_dis0.42102.pth +2023-10-26 14:49:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1020, lr:1.953913e-05, step_mean_loss:0.012317833490669727, average_loss:0.1589404108461595), time, (train_step_time: 1.36585s, train_average_time: 0.69087s);(grad_norm_mean: nan, grad_norm_step: 1.32061) +2023-10-26 14:50:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1040, lr:1.950329e-05, step_mean_loss:0.11118046939373016, average_loss:0.15740902271018758), time, (train_step_time: 1.41471s, train_average_time: 0.70360s);(grad_norm_mean: nan, grad_norm_step: 2.64110) +2023-10-26 14:50:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1060, lr:1.946616e-05, step_mean_loss:0.002752594416961074, average_loss:0.15573337241075932), time, (train_step_time: 1.11121s, train_average_time: 0.71571s);(grad_norm_mean: nan, grad_norm_step: 0.19268) +2023-10-26 14:51:05 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1080, lr:1.942772e-05, step_mean_loss:0.021473655477166176, average_loss:0.15375399867975997), time, (train_step_time: 1.51865s, train_average_time: 0.72747s);(grad_norm_mean: nan, grad_norm_step: 1.31964) +2023-10-26 14:51:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1100, lr:1.938798e-05, step_mean_loss:0.04170094057917595, average_loss:0.1520074430881703), time, (train_step_time: 1.47922s, train_average_time: 0.73922s);(grad_norm_mean: nan, grad_norm_step: 1.43889) +2023-10-26 14:51:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1120, lr:1.934696e-05, step_mean_loss:0.03223899006843567, average_loss:0.15067306330175986), time, (train_step_time: 1.45121s, train_average_time: 0.74948s);(grad_norm_mean: nan, grad_norm_step: 1.31450) +2023-10-26 14:52:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1140, lr:1.930465e-05, step_mean_loss:0.05405557155609131, average_loss:0.14922936626724295), time, (train_step_time: 1.51176s, train_average_time: 0.76052s);(grad_norm_mean: nan, grad_norm_step: 2.49656) +2023-10-26 14:52:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 3, steps: 1160, lr:1.926107e-05, step_mean_loss:0.1044929102063179, average_loss:0.14800804893773226), time, (train_step_time: 1.45876s, train_average_time: 0.77026s);(grad_norm_mean: nan, grad_norm_step: 2.95312) +2023-10-26 14:53:09 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 3, steps: 1172); +2023-10-26 14:53:13 INFO root donut_experiment.py:66 - batch inference time:3.4982049465179443 s +2023-10-26 14:53:16 INFO root donut_experiment.py:66 - batch inference time:2.7735366821289062 s +2023-10-26 14:53:18 INFO root donut_experiment.py:66 - batch inference time:1.6996378898620605 s +2023-10-26 14:53:19 INFO root donut_experiment.py:66 - batch inference time:1.1801910400390625 s +2023-10-26 14:53:21 INFO root donut_experiment.py:66 - batch inference time:2.228671073913574 s +2023-10-26 14:53:23 INFO root donut_experiment.py:66 - batch inference time:1.3824198246002197 s +2023-10-26 14:53:24 INFO root donut_experiment.py:66 - batch inference time:1.021801471710205 s +2023-10-26 14:53:25 INFO root donut_experiment.py:66 - batch inference time:1.5622761249542236 s +2023-10-26 14:53:27 INFO root donut_experiment.py:66 - batch inference time:1.6381158828735352 s +2023-10-26 14:53:29 INFO root donut_experiment.py:66 - batch inference time:2.2615156173706055 s +2023-10-26 14:53:32 INFO root donut_experiment.py:66 - batch inference time:2.424365520477295 s +2023-10-26 14:53:33 INFO root donut_experiment.py:66 - batch inference time:0.8583006858825684 s +2023-10-26 14:53:35 INFO root donut_experiment.py:66 - batch inference time:2.6869099140167236 s +2023-10-26 14:53:37 INFO root donut_experiment.py:66 - batch inference time:1.7341442108154297 s +2023-10-26 14:53:40 INFO root donut_experiment.py:66 - batch inference time:2.2396762371063232 s +2023-10-26 14:53:41 INFO root donut_experiment.py:66 - batch inference time:1.5915570259094238 s +2023-10-26 14:53:43 INFO root donut_experiment.py:66 - batch inference time:2.0230300426483154 s +2023-10-26 14:53:45 INFO root donut_experiment.py:66 - batch inference time:1.9102435111999512 s +2023-10-26 14:53:45 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 14:53:45 INFO root donut_experiment.py:76 - token_acc: 0.387116754413131; edit_dis: 0.145337681897352 +2023-10-26 14:53:51 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch3_step1172_lr1.923430e-05_avg_loss0.14695_token_acc0.38712_edit_dis0.14534.pth +2023-10-26 14:54:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1180, lr:1.921621e-05, step_mean_loss:0.05831986665725708, average_loss:0.1462140052412613), time, (train_step_time: 1.00289s, train_average_time: 0.78063s);(grad_norm_mean: nan, grad_norm_step: 6.09673) +2023-10-26 14:54:29 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1200, lr:1.917008e-05, step_mean_loss:0.011302677914500237, average_loss:0.14467897215266323), time, (train_step_time: 1.49298s, train_average_time: 0.78932s);(grad_norm_mean: nan, grad_norm_step: 0.69212) +2023-10-26 14:54:56 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1220, lr:1.912270e-05, step_mean_loss:0.03413653001189232, average_loss:0.14310457402655138), time, (train_step_time: 1.46102s, train_average_time: 0.79877s);(grad_norm_mean: nan, grad_norm_step: 1.72086) +2023-10-26 14:55:23 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1240, lr:1.907406e-05, step_mean_loss:0.10565021634101868, average_loss:0.14156838427663915), time, (train_step_time: 1.51867s, train_average_time: 0.80762s);(grad_norm_mean: nan, grad_norm_step: 7.43851) +2023-10-26 14:55:50 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1260, lr:1.902417e-05, step_mean_loss:0.009332851506769657, average_loss:0.1400657438273017), time, (train_step_time: 0.95570s, train_average_time: 0.81597s);(grad_norm_mean: nan, grad_norm_step: 0.74476) +2023-10-26 14:56:17 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1280, lr:1.897304e-05, step_mean_loss:0.0904456153512001, average_loss:0.13859184555685716), time, (train_step_time: 1.44519s, train_average_time: 0.82430s);(grad_norm_mean: nan, grad_norm_step: 7.08280) +2023-10-26 14:56:43 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1300, lr:1.892068e-05, step_mean_loss:0.15011626482009888, average_loss:0.1371541393726455), time, (train_step_time: 1.10646s, train_average_time: 0.83199s);(grad_norm_mean: nan, grad_norm_step: 5.31962) +2023-10-26 14:57:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1320, lr:1.886710e-05, step_mean_loss:0.010552891530096531, average_loss:0.13607068263183023), time, (train_step_time: 1.48332s, train_average_time: 0.84032s);(grad_norm_mean: nan, grad_norm_step: 0.92289) +2023-10-26 14:57:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1340, lr:1.881230e-05, step_mean_loss:0.018533725291490555, average_loss:0.13563431130518872), time, (train_step_time: 0.74361s, train_average_time: 0.84721s);(grad_norm_mean: nan, grad_norm_step: 0.67063) +2023-10-26 14:58:04 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1360, lr:1.875628e-05, step_mean_loss:0.00952776800841093, average_loss:0.1341718652004013), time, (train_step_time: 1.53647s, train_average_time: 0.85442s);(grad_norm_mean: nan, grad_norm_step: 0.58536) +2023-10-26 14:58:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1380, lr:1.869906e-05, step_mean_loss:0.009886021725833416, average_loss:0.13273089530867646), time, (train_step_time: 1.51531s, train_average_time: 0.86249s);(grad_norm_mean: nan, grad_norm_step: 0.54459) +2023-10-26 14:58:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1400, lr:1.864065e-05, step_mean_loss:0.028380298987030983, average_loss:0.1313539844310643), time, (train_step_time: 1.42081s, train_average_time: 0.86938s);(grad_norm_mean: nan, grad_norm_step: 2.22272) +2023-10-26 14:59:27 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1420, lr:1.858105e-05, step_mean_loss:0.1180654764175415, average_loss:0.13014719609759787), time, (train_step_time: 1.48765s, train_average_time: 0.87643s);(grad_norm_mean: nan, grad_norm_step: 2.38897) +2023-10-26 14:59:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1440, lr:1.852027e-05, step_mean_loss:0.018837206065654755, average_loss:0.1290257255241967), time, (train_step_time: 1.29256s, train_average_time: 0.88266s);(grad_norm_mean: nan, grad_norm_step: 0.63003) +2023-10-26 15:00:21 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 4, steps: 1460, lr:1.845832e-05, step_mean_loss:0.05477258190512657, average_loss:0.12843053185003045), time, (train_step_time: 1.44879s, train_average_time: 0.88976s);(grad_norm_mean: nan, grad_norm_step: 5.42420) +2023-10-26 15:00:28 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 4, steps: 1465); +2023-10-26 15:00:41 INFO root donut_experiment.py:66 - batch inference time:12.087937831878662 s +2023-10-26 15:00:44 INFO root donut_experiment.py:66 - batch inference time:2.7067325115203857 s +2023-10-26 15:00:45 INFO root donut_experiment.py:66 - batch inference time:1.6807632446289062 s +2023-10-26 15:00:47 INFO root donut_experiment.py:66 - batch inference time:1.3601906299591064 s +2023-10-26 15:00:49 INFO root donut_experiment.py:66 - batch inference time:2.289537191390991 s +2023-10-26 15:00:50 INFO root donut_experiment.py:66 - batch inference time:1.3039422035217285 s +2023-10-26 15:00:51 INFO root donut_experiment.py:66 - batch inference time:1.0177631378173828 s +2023-10-26 15:00:53 INFO root donut_experiment.py:66 - batch inference time:1.5090396404266357 s +2023-10-26 15:00:55 INFO root donut_experiment.py:66 - batch inference time:1.6251521110534668 s +2023-10-26 15:00:57 INFO root donut_experiment.py:66 - batch inference time:2.3000965118408203 s +2023-10-26 15:00:59 INFO root donut_experiment.py:66 - batch inference time:2.028348445892334 s +2023-10-26 15:01:00 INFO root donut_experiment.py:66 - batch inference time:0.8769547939300537 s +2023-10-26 15:01:03 INFO root donut_experiment.py:66 - batch inference time:3.5214483737945557 s +2023-10-26 15:01:05 INFO root donut_experiment.py:66 - batch inference time:1.8790628910064697 s +2023-10-26 15:01:08 INFO root donut_experiment.py:66 - batch inference time:2.477200746536255 s +2023-10-26 15:01:10 INFO root donut_experiment.py:66 - batch inference time:1.6812753677368164 s +2023-10-26 15:01:12 INFO root donut_experiment.py:66 - batch inference time:2.0384695529937744 s +2023-10-26 15:01:14 INFO root donut_experiment.py:66 - batch inference time:1.8452661037445068 s +2023-10-26 15:01:14 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:01:14 INFO root donut_experiment.py:76 - token_acc: 0.31961448293826517; edit_dis: 0.19151116360404974 +2023-10-26 15:01:19 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch4_step1465_lr1.844265e-05_avg_loss0.12810_token_acc0.31961_edit_dis0.19151.pth +2023-10-26 15:01:41 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1480, lr:1.839521e-05, step_mean_loss:0.02895713783800602, average_loss:0.1271156394438082), time, (train_step_time: 1.49820s, train_average_time: 0.89623s);(grad_norm_mean: nan, grad_norm_step: 0.97082) +2023-10-26 15:02:07 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1500, lr:1.833094e-05, step_mean_loss:0.010748707689344883, average_loss:0.12592166414957803), time, (train_step_time: 1.48704s, train_average_time: 0.90205s);(grad_norm_mean: nan, grad_norm_step: 0.86688) +2023-10-26 15:02:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1520, lr:1.826553e-05, step_mean_loss:0.027639156207442284, average_loss:0.12470231347044893), time, (train_step_time: 1.48869s, train_average_time: 0.90686s);(grad_norm_mean: nan, grad_norm_step: 1.06291) +2023-10-26 15:03:02 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1540, lr:1.819898e-05, step_mean_loss:0.014100916683673859, average_loss:0.12348504123125262), time, (train_step_time: 1.46764s, train_average_time: 0.91377s);(grad_norm_mean: nan, grad_norm_step: 1.10979) +2023-10-26 15:03:29 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1560, lr:1.813131e-05, step_mean_loss:0.13579890131950378, average_loss:0.12242552399036821), time, (train_step_time: 1.14208s, train_average_time: 0.91935s);(grad_norm_mean: nan, grad_norm_step: 3.48248) +2023-10-26 15:03:56 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1580, lr:1.806252e-05, step_mean_loss:0.04714605584740639, average_loss:0.12119672524726573), time, (train_step_time: 1.07072s, train_average_time: 0.92516s);(grad_norm_mean: nan, grad_norm_step: 2.23585) +2023-10-26 15:04:25 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1600, lr:1.799262e-05, step_mean_loss:0.005334994290024042, average_loss:0.12016819550473884), time, (train_step_time: 1.41128s, train_average_time: 0.93134s);(grad_norm_mean: nan, grad_norm_step: 0.24529) +2023-10-26 15:04:52 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1620, lr:1.792162e-05, step_mean_loss:0.0164658110588789, average_loss:0.11918923676547158), time, (train_step_time: 1.16623s, train_average_time: 0.93660s);(grad_norm_mean: nan, grad_norm_step: 0.96156) +2023-10-26 15:05:19 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1640, lr:1.784954e-05, step_mean_loss:0.02323094941675663, average_loss:0.1182209932160842), time, (train_step_time: 1.19299s, train_average_time: 0.94200s);(grad_norm_mean: nan, grad_norm_step: 2.04481) +2023-10-26 15:05:45 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1660, lr:1.777638e-05, step_mean_loss:0.023330843076109886, average_loss:0.11771908923794223), time, (train_step_time: 1.26002s, train_average_time: 0.94629s);(grad_norm_mean: nan, grad_norm_step: 2.44010) +2023-10-26 15:06:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1680, lr:1.770214e-05, step_mean_loss:0.01177553553134203, average_loss:0.11688254955148468), time, (train_step_time: 1.50566s, train_average_time: 0.95088s);(grad_norm_mean: nan, grad_norm_step: 0.67473) +2023-10-26 15:06:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1700, lr:1.762685e-05, step_mean_loss:0.012653469108045101, average_loss:0.11583539411398198), time, (train_step_time: 1.49736s, train_average_time: 0.95518s);(grad_norm_mean: nan, grad_norm_step: 1.59189) +2023-10-26 15:07:06 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1720, lr:1.755051e-05, step_mean_loss:0.036925461143255234, average_loss:0.1150214190026312), time, (train_step_time: 1.50232s, train_average_time: 0.95993s);(grad_norm_mean: nan, grad_norm_step: 0.94237) +2023-10-26 15:07:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 5, steps: 1740, lr:1.747314e-05, step_mean_loss:0.09499024599790573, average_loss:0.11412764521899109), time, (train_step_time: 1.46430s, train_average_time: 0.96459s);(grad_norm_mean: nan, grad_norm_step: 3.07479) +2023-10-26 15:07:57 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 5, steps: 1758); +2023-10-26 15:08:02 INFO root donut_experiment.py:66 - batch inference time:3.6812052726745605 s +2023-10-26 15:08:05 INFO root donut_experiment.py:66 - batch inference time:2.756683826446533 s +2023-10-26 15:08:06 INFO root donut_experiment.py:66 - batch inference time:1.6404712200164795 s +2023-10-26 15:08:08 INFO root donut_experiment.py:66 - batch inference time:1.2154953479766846 s +2023-10-26 15:08:10 INFO root donut_experiment.py:66 - batch inference time:2.2099931240081787 s +2023-10-26 15:08:11 INFO root donut_experiment.py:66 - batch inference time:1.3890509605407715 s +2023-10-26 15:08:12 INFO root donut_experiment.py:66 - batch inference time:1.0307559967041016 s +2023-10-26 15:08:14 INFO root donut_experiment.py:66 - batch inference time:1.5838019847869873 s +2023-10-26 15:08:16 INFO root donut_experiment.py:66 - batch inference time:1.6482596397399902 s +2023-10-26 15:08:18 INFO root donut_experiment.py:66 - batch inference time:2.2972841262817383 s +2023-10-26 15:08:20 INFO root donut_experiment.py:66 - batch inference time:2.0058531761169434 s +2023-10-26 15:08:21 INFO root donut_experiment.py:66 - batch inference time:0.8987977504730225 s +2023-10-26 15:08:23 INFO root donut_experiment.py:66 - batch inference time:2.149653673171997 s +2023-10-26 15:08:25 INFO root donut_experiment.py:66 - batch inference time:1.9262886047363281 s +2023-10-26 15:08:37 INFO root donut_experiment.py:66 - batch inference time:12.095877170562744 s +2023-10-26 15:08:39 INFO root donut_experiment.py:66 - batch inference time:1.6247427463531494 s +2023-10-26 15:08:41 INFO root donut_experiment.py:66 - batch inference time:1.8670430183410645 s +2023-10-26 15:08:42 INFO root donut_experiment.py:66 - batch inference time:1.7066609859466553 s +2023-10-26 15:08:43 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:08:43 INFO root donut_experiment.py:76 - token_acc: 0.25653082549634276; edit_dis: 0.3073651177802486 +2023-10-26 15:08:48 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch5_step1758_lr1.740262e-05_avg_loss0.11338_token_acc0.25653_edit_dis0.30737.pth +2023-10-26 15:08:52 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1760, lr:1.739473e-05, step_mean_loss:0.04466785863041878, average_loss:0.11328764246268706), time, (train_step_time: 1.43717s, train_average_time: 0.96920s);(grad_norm_mean: nan, grad_norm_step: 1.46781) +2023-10-26 15:09:20 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1780, lr:1.731531e-05, step_mean_loss:0.00988447479903698, average_loss:0.11250996474684163), time, (train_step_time: 1.41598s, train_average_time: 0.97392s);(grad_norm_mean: nan, grad_norm_step: 1.06266) +2023-10-26 15:09:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1800, lr:1.723489e-05, step_mean_loss:0.0019381347810849547, average_loss:0.11163241035647742), time, (train_step_time: 1.45344s, train_average_time: 0.97828s);(grad_norm_mean: nan, grad_norm_step: 0.35313) +2023-10-26 15:10:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1820, lr:1.715347e-05, step_mean_loss:0.005099006928503513, average_loss:0.11060135118224515), time, (train_step_time: 1.25151s, train_average_time: 0.98227s);(grad_norm_mean: nan, grad_norm_step: 0.52428) +2023-10-26 15:10:41 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1840, lr:1.707107e-05, step_mean_loss:0.013171212747693062, average_loss:0.10996903419569287), time, (train_step_time: 1.49405s, train_average_time: 0.98605s);(grad_norm_mean: nan, grad_norm_step: 1.20588) +2023-10-26 15:11:08 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1860, lr:1.698769e-05, step_mean_loss:0.09073515236377716, average_loss:0.10917509762634102), time, (train_step_time: 1.43869s, train_average_time: 0.99016s);(grad_norm_mean: nan, grad_norm_step: 4.40304) +2023-10-26 15:11:36 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1880, lr:1.690336e-05, step_mean_loss:0.024830549955368042, average_loss:0.1082908757557929), time, (train_step_time: 1.54662s, train_average_time: 0.99444s);(grad_norm_mean: nan, grad_norm_step: 1.80086) +2023-10-26 15:12:04 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1900, lr:1.681808e-05, step_mean_loss:0.008461440913379192, average_loss:0.10762949375466345), time, (train_step_time: 0.85383s, train_average_time: 0.99892s);(grad_norm_mean: nan, grad_norm_step: 1.33239) +2023-10-26 15:12:31 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1920, lr:1.673186e-05, step_mean_loss:0.042853597551584244, average_loss:0.10691914366234415), time, (train_step_time: 1.62635s, train_average_time: 1.00243s);(grad_norm_mean: nan, grad_norm_step: 1.55694) +2023-10-26 15:12:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1940, lr:1.664471e-05, step_mean_loss:0.026826754212379456, average_loss:0.10599818307571546), time, (train_step_time: 1.54949s, train_average_time: 1.00649s);(grad_norm_mean: nan, grad_norm_step: 1.40742) +2023-10-26 15:13:27 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1960, lr:1.655666e-05, step_mean_loss:0.052112068980932236, average_loss:0.1053381652772612), time, (train_step_time: 1.46390s, train_average_time: 1.01038s);(grad_norm_mean: nan, grad_norm_step: 3.68372) +2023-10-26 15:13:54 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 1980, lr:1.646770e-05, step_mean_loss:0.06787209212779999, average_loss:0.10455493296060307), time, (train_step_time: 1.57514s, train_average_time: 1.01419s);(grad_norm_mean: nan, grad_norm_step: 3.35336) +2023-10-26 15:14:21 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 2000, lr:1.637785e-05, step_mean_loss:0.015237807296216488, average_loss:0.10382468447159045), time, (train_step_time: 1.51393s, train_average_time: 1.01711s);(grad_norm_mean: nan, grad_norm_step: 1.96372) +2023-10-26 15:14:21 INFO root donut_experiment.py:251 - experiment:nougat-base; eval, (epoch: 6, steps: 2000); +2023-10-26 15:14:25 INFO root donut_experiment.py:66 - batch inference time:3.5952329635620117 s +2023-10-26 15:14:28 INFO root donut_experiment.py:66 - batch inference time:2.7680535316467285 s +2023-10-26 15:14:30 INFO root donut_experiment.py:66 - batch inference time:1.8263609409332275 s +2023-10-26 15:14:31 INFO root donut_experiment.py:66 - batch inference time:1.4241902828216553 s +2023-10-26 15:14:33 INFO root donut_experiment.py:66 - batch inference time:2.268580913543701 s +2023-10-26 15:14:35 INFO root donut_experiment.py:66 - batch inference time:1.3699610233306885 s +2023-10-26 15:14:36 INFO root donut_experiment.py:66 - batch inference time:1.0157549381256104 s +2023-10-26 15:14:38 INFO root donut_experiment.py:66 - batch inference time:1.5965321063995361 s +2023-10-26 15:14:39 INFO root donut_experiment.py:66 - batch inference time:1.6632251739501953 s +2023-10-26 15:14:42 INFO root donut_experiment.py:66 - batch inference time:2.3759334087371826 s +2023-10-26 15:14:44 INFO root donut_experiment.py:66 - batch inference time:2.0652170181274414 s +2023-10-26 15:14:45 INFO root donut_experiment.py:66 - batch inference time:0.8574929237365723 s +2023-10-26 15:14:47 INFO root donut_experiment.py:66 - batch inference time:2.1901938915252686 s +2023-10-26 15:14:49 INFO root donut_experiment.py:66 - batch inference time:1.733518123626709 s +2023-10-26 15:14:51 INFO root donut_experiment.py:66 - batch inference time:2.7446930408477783 s +2023-10-26 15:14:53 INFO root donut_experiment.py:66 - batch inference time:1.802074670791626 s +2023-10-26 15:14:56 INFO root donut_experiment.py:66 - batch inference time:2.2440319061279297 s +2023-10-26 15:14:57 INFO root donut_experiment.py:66 - batch inference time:1.8702428340911865 s +2023-10-26 15:14:58 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:14:58 INFO root donut_experiment.py:76 - token_acc: 0.3791498603785293; edit_dis: 0.14814483389183436 +2023-10-26 15:15:04 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch6_step2000_lr1.637785e-05_avg_loss0.10382_token_acc0.37915_edit_dis0.14814.pth +2023-10-26 15:15:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 2020, lr:1.628713e-05, step_mean_loss:0.04260364547371864, average_loss:0.10304850720913417), time, (train_step_time: 1.41897s, train_average_time: 1.02096s);(grad_norm_mean: nan, grad_norm_step: 4.04899) +2023-10-26 15:15:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 6, steps: 2040, lr:1.619554e-05, step_mean_loss:0.007230571936815977, average_loss:0.10227555815602982), time, (train_step_time: 1.46551s, train_average_time: 1.02394s);(grad_norm_mean: nan, grad_norm_step: 0.40331) +2023-10-26 15:16:14 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 6, steps: 2051); +2023-10-26 15:16:19 INFO root donut_experiment.py:66 - batch inference time:3.5730011463165283 s +2023-10-26 15:16:21 INFO root donut_experiment.py:66 - batch inference time:2.714547872543335 s +2023-10-26 15:16:23 INFO root donut_experiment.py:66 - batch inference time:1.640960454940796 s +2023-10-26 15:16:24 INFO root donut_experiment.py:66 - batch inference time:1.1906623840332031 s +2023-10-26 15:16:27 INFO root donut_experiment.py:66 - batch inference time:2.2084755897521973 s +2023-10-26 15:16:28 INFO root donut_experiment.py:66 - batch inference time:1.3307194709777832 s +2023-10-26 15:16:29 INFO root donut_experiment.py:66 - batch inference time:1.0036468505859375 s +2023-10-26 15:16:31 INFO root donut_experiment.py:66 - batch inference time:1.5694000720977783 s +2023-10-26 15:16:32 INFO root donut_experiment.py:66 - batch inference time:1.632582426071167 s +2023-10-26 15:16:34 INFO root donut_experiment.py:66 - batch inference time:2.293667793273926 s +2023-10-26 15:16:37 INFO root donut_experiment.py:66 - batch inference time:2.1853137016296387 s +2023-10-26 15:16:38 INFO root donut_experiment.py:66 - batch inference time:0.8504388332366943 s +2023-10-26 15:16:40 INFO root donut_experiment.py:66 - batch inference time:2.4231526851654053 s +2023-10-26 15:16:42 INFO root donut_experiment.py:66 - batch inference time:1.7953550815582275 s +2023-10-26 15:16:54 INFO root donut_experiment.py:66 - batch inference time:12.105730772018433 s +2023-10-26 15:16:56 INFO root donut_experiment.py:66 - batch inference time:1.7026731967926025 s +2023-10-26 15:16:58 INFO root donut_experiment.py:66 - batch inference time:2.160717725753784 s +2023-10-26 15:17:00 INFO root donut_experiment.py:66 - batch inference time:1.846764326095581 s +2023-10-26 15:17:00 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:17:00 INFO root donut_experiment.py:76 - token_acc: 0.3609375; edit_dis: 0.21155490888278658 +2023-10-26 15:17:06 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch6_step2051_lr1.614480e-05_avg_loss0.10190_token_acc0.36094_edit_dis0.21155.pth +2023-10-26 15:17:18 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2060, lr:1.610310e-05, step_mean_loss:0.008545593358576298, average_loss:0.10151200764230453), time, (train_step_time: 1.45341s, train_average_time: 1.02723s);(grad_norm_mean: nan, grad_norm_step: 0.58086) +2023-10-26 15:17:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2080, lr:1.600983e-05, step_mean_loss:0.14015880227088928, average_loss:0.1008294139170735), time, (train_step_time: 0.81292s, train_average_time: 1.02995s);(grad_norm_mean: nan, grad_norm_step: 2.44004) +2023-10-26 15:18:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2100, lr:1.591572e-05, step_mean_loss:0.1266990602016449, average_loss:0.10026936835562256), time, (train_step_time: 1.42015s, train_average_time: 1.03253s);(grad_norm_mean: nan, grad_norm_step: 12.17378) +2023-10-26 15:18:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2120, lr:1.582081e-05, step_mean_loss:0.03851413354277611, average_loss:0.09974715178807701), time, (train_step_time: 0.73742s, train_average_time: 1.03444s);(grad_norm_mean: nan, grad_norm_step: 2.89073) +2023-10-26 15:19:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2140, lr:1.572509e-05, step_mean_loss:0.008161376230418682, average_loss:0.09900658560547408), time, (train_step_time: 1.42416s, train_average_time: 1.03777s);(grad_norm_mean: nan, grad_norm_step: 0.80542) +2023-10-26 15:19:29 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2160, lr:1.562859e-05, step_mean_loss:0.0357121117413044, average_loss:0.09852765795952076), time, (train_step_time: 1.43797s, train_average_time: 1.04025s);(grad_norm_mean: nan, grad_norm_step: 1.97411) +2023-10-26 15:19:57 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2180, lr:1.553131e-05, step_mean_loss:0.007080395705997944, average_loss:0.09783865066505264), time, (train_step_time: 0.78254s, train_average_time: 1.04332s);(grad_norm_mean: nan, grad_norm_step: 0.57902) +2023-10-26 15:20:23 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2200, lr:1.543328e-05, step_mean_loss:0.0012998144375160336, average_loss:0.09739702077712123), time, (train_step_time: 1.34958s, train_average_time: 1.04604s);(grad_norm_mean: nan, grad_norm_step: 0.08380) +2023-10-26 15:20:50 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2220, lr:1.533449e-05, step_mean_loss:0.0218821968883276, average_loss:0.09669097074968762), time, (train_step_time: 1.46922s, train_average_time: 1.04870s);(grad_norm_mean: nan, grad_norm_step: 1.86272) +2023-10-26 15:21:17 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2240, lr:1.523498e-05, step_mean_loss:0.007052186410874128, average_loss:0.09614758056936158), time, (train_step_time: 1.45558s, train_average_time: 1.05117s);(grad_norm_mean: nan, grad_norm_step: 0.43534) +2023-10-26 15:21:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2260, lr:1.513474e-05, step_mean_loss:0.008248084224760532, average_loss:0.09554030012689115), time, (train_step_time: 1.51119s, train_average_time: 1.05394s);(grad_norm_mean: nan, grad_norm_step: 1.99885) +2023-10-26 15:22:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2280, lr:1.503380e-05, step_mean_loss:0.019918693229556084, average_loss:0.09500164557396426), time, (train_step_time: 1.00253s, train_average_time: 1.05681s);(grad_norm_mean: nan, grad_norm_step: 1.86116) +2023-10-26 15:22:39 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2300, lr:1.493217e-05, step_mean_loss:0.050043053925037384, average_loss:0.09435048058077329), time, (train_step_time: 1.70056s, train_average_time: 1.05968s);(grad_norm_mean: nan, grad_norm_step: 2.16246) +2023-10-26 15:23:07 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2320, lr:1.482986e-05, step_mean_loss:0.0027521331794559956, average_loss:0.09374033729641112), time, (train_step_time: 1.49371s, train_average_time: 1.06257s);(grad_norm_mean: nan, grad_norm_step: 0.62627) +2023-10-26 15:23:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 7, steps: 2340, lr:1.472689e-05, step_mean_loss:0.008258584886789322, average_loss:0.0930569066130009), time, (train_step_time: 1.46796s, train_average_time: 1.06524s);(grad_norm_mean: nan, grad_norm_step: 1.07694) +2023-10-26 15:23:41 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 7, steps: 2344); +2023-10-26 15:23:54 INFO root donut_experiment.py:66 - batch inference time:12.292154788970947 s +2023-10-26 15:23:57 INFO root donut_experiment.py:66 - batch inference time:2.7671031951904297 s +2023-10-26 15:23:59 INFO root donut_experiment.py:66 - batch inference time:1.7322852611541748 s +2023-10-26 15:24:00 INFO root donut_experiment.py:66 - batch inference time:1.2407302856445312 s +2023-10-26 15:24:02 INFO root donut_experiment.py:66 - batch inference time:2.288686752319336 s +2023-10-26 15:24:04 INFO root donut_experiment.py:66 - batch inference time:1.3106472492218018 s +2023-10-26 15:24:05 INFO root donut_experiment.py:66 - batch inference time:1.0174980163574219 s +2023-10-26 15:24:06 INFO root donut_experiment.py:66 - batch inference time:1.6385974884033203 s +2023-10-26 15:24:08 INFO root donut_experiment.py:66 - batch inference time:1.701892614364624 s +2023-10-26 15:24:10 INFO root donut_experiment.py:66 - batch inference time:2.3396568298339844 s +2023-10-26 15:24:12 INFO root donut_experiment.py:66 - batch inference time:1.7226510047912598 s +2023-10-26 15:24:13 INFO root donut_experiment.py:66 - batch inference time:0.862891435623169 s +2023-10-26 15:24:15 INFO root donut_experiment.py:66 - batch inference time:2.3119232654571533 s +2023-10-26 15:24:17 INFO root donut_experiment.py:66 - batch inference time:1.948439121246338 s +2023-10-26 15:24:20 INFO root donut_experiment.py:66 - batch inference time:2.184774398803711 s +2023-10-26 15:24:21 INFO root donut_experiment.py:66 - batch inference time:1.684689998626709 s +2023-10-26 15:24:23 INFO root donut_experiment.py:66 - batch inference time:2.001847505569458 s +2023-10-26 15:24:25 INFO root donut_experiment.py:66 - batch inference time:1.9200844764709473 s +2023-10-26 15:24:25 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:24:25 INFO root donut_experiment.py:76 - token_acc: 0.3481382978723404; edit_dis: 0.16681425649802858 +2023-10-26 15:24:32 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch7_step2344_lr1.470621e-05_avg_loss0.09291_token_acc0.34814_edit_dis0.16681.pth +2023-10-26 15:24:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2360, lr:1.462326e-05, step_mean_loss:0.010016954503953457, average_loss:0.09243705978232802), time, (train_step_time: 1.47333s, train_average_time: 1.06729s);(grad_norm_mean: nan, grad_norm_step: 1.04670) +2023-10-26 15:25:19 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2380, lr:1.451900e-05, step_mean_loss:0.009938368573784828, average_loss:0.09178507221210161), time, (train_step_time: 1.37184s, train_average_time: 1.06940s);(grad_norm_mean: nan, grad_norm_step: 2.35122) +2023-10-26 15:25:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2400, lr:1.441412e-05, step_mean_loss:0.14055263996124268, average_loss:0.09143725306033351), time, (train_step_time: 1.52775s, train_average_time: 1.07200s);(grad_norm_mean: nan, grad_norm_step: 8.51355) +2023-10-26 15:26:15 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2420, lr:1.430864e-05, step_mean_loss:0.022078048437833786, average_loss:0.09092888744256444), time, (train_step_time: 1.48700s, train_average_time: 1.07467s);(grad_norm_mean: nan, grad_norm_step: 1.06424) +2023-10-26 15:26:41 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2440, lr:1.420256e-05, step_mean_loss:0.04048808664083481, average_loss:0.09038077428983238), time, (train_step_time: 1.46976s, train_average_time: 1.07656s);(grad_norm_mean: nan, grad_norm_step: 3.22662) +2023-10-26 15:27:08 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2460, lr:1.409590e-05, step_mean_loss:0.012841620482504368, average_loss:0.08972053976477043), time, (train_step_time: 1.47349s, train_average_time: 1.07879s);(grad_norm_mean: nan, grad_norm_step: 0.70089) +2023-10-26 15:27:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2480, lr:1.398869e-05, step_mean_loss:0.12158260494470596, average_loss:0.0892137555078901), time, (train_step_time: 1.00169s, train_average_time: 1.08109s);(grad_norm_mean: nan, grad_norm_step: 6.66111) +2023-10-26 15:28:02 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2500, lr:1.388092e-05, step_mean_loss:0.00391597393900156, average_loss:0.08863153941518394), time, (train_step_time: 1.43918s, train_average_time: 1.08320s);(grad_norm_mean: nan, grad_norm_step: 0.35116) +2023-10-26 15:28:29 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2520, lr:1.377262e-05, step_mean_loss:0.0048360927030444145, average_loss:0.08810131148385976), time, (train_step_time: 1.21871s, train_average_time: 1.08528s);(grad_norm_mean: nan, grad_norm_step: 0.53268) +2023-10-26 15:28:56 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2540, lr:1.366380e-05, step_mean_loss:0.008632609620690346, average_loss:0.08756229332330825), time, (train_step_time: 1.32269s, train_average_time: 1.08741s);(grad_norm_mean: nan, grad_norm_step: 1.09895) +2023-10-26 15:29:23 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2560, lr:1.355448e-05, step_mean_loss:0.0023661013692617416, average_loss:0.08696765936339261), time, (train_step_time: 1.45984s, train_average_time: 1.08942s);(grad_norm_mean: nan, grad_norm_step: 0.53307) +2023-10-26 15:29:50 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2580, lr:1.344467e-05, step_mean_loss:0.0010263840667903423, average_loss:0.08656028368887753), time, (train_step_time: 1.44535s, train_average_time: 1.09158s);(grad_norm_mean: nan, grad_norm_step: 0.05761) +2023-10-26 15:30:18 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2600, lr:1.333439e-05, step_mean_loss:0.0012946156784892082, average_loss:0.08599181376958417), time, (train_step_time: 1.43280s, train_average_time: 1.09367s);(grad_norm_mean: nan, grad_norm_step: 0.07248) +2023-10-26 15:30:45 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 8, steps: 2620, lr:1.322364e-05, step_mean_loss:0.0005932372296229005, average_loss:0.08545413400267139), time, (train_step_time: 1.26890s, train_average_time: 1.09564s);(grad_norm_mean: nan, grad_norm_step: 0.05266) +2023-10-26 15:31:08 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 8, steps: 2637); +2023-10-26 15:31:12 INFO root donut_experiment.py:66 - batch inference time:3.3740768432617188 s +2023-10-26 15:31:15 INFO root donut_experiment.py:66 - batch inference time:2.8149304389953613 s +2023-10-26 15:31:16 INFO root donut_experiment.py:66 - batch inference time:1.7314834594726562 s +2023-10-26 15:31:18 INFO root donut_experiment.py:66 - batch inference time:1.215928554534912 s +2023-10-26 15:31:20 INFO root donut_experiment.py:66 - batch inference time:2.4165618419647217 s +2023-10-26 15:31:21 INFO root donut_experiment.py:66 - batch inference time:1.2735953330993652 s +2023-10-26 15:31:22 INFO root donut_experiment.py:66 - batch inference time:1.008598804473877 s +2023-10-26 15:31:24 INFO root donut_experiment.py:66 - batch inference time:1.568319320678711 s +2023-10-26 15:31:26 INFO root donut_experiment.py:66 - batch inference time:1.6208136081695557 s +2023-10-26 15:31:28 INFO root donut_experiment.py:66 - batch inference time:2.277200222015381 s +2023-10-26 15:31:30 INFO root donut_experiment.py:66 - batch inference time:2.5231266021728516 s +2023-10-26 15:31:31 INFO root donut_experiment.py:66 - batch inference time:0.867490291595459 s +2023-10-26 15:31:34 INFO root donut_experiment.py:66 - batch inference time:2.4632418155670166 s +2023-10-26 15:31:35 INFO root donut_experiment.py:66 - batch inference time:0.8013710975646973 s +2023-10-26 15:31:37 INFO root donut_experiment.py:66 - batch inference time:2.2487454414367676 s +2023-10-26 15:31:39 INFO root donut_experiment.py:66 - batch inference time:1.6198458671569824 s +2023-10-26 15:31:41 INFO root donut_experiment.py:66 - batch inference time:2.0759453773498535 s +2023-10-26 15:31:43 INFO root donut_experiment.py:66 - batch inference time:1.8593571186065674 s +2023-10-26 15:31:43 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:31:43 INFO root donut_experiment.py:76 - token_acc: 0.36130030959752324; edit_dis: 0.13664641988863058 +2023-10-26 15:31:49 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch8_step2637_lr1.312916e-05_avg_loss0.08506_token_acc0.36130_edit_dis0.13665.pth +2023-10-26 15:31:54 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2640, lr:1.311246e-05, step_mean_loss:0.00304964417591691, average_loss:0.0850001278973099), time, (train_step_time: 1.49646s, train_average_time: 1.09766s);(grad_norm_mean: nan, grad_norm_step: 1.42859) +2023-10-26 15:32:21 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2660, lr:1.300085e-05, step_mean_loss:0.03713580220937729, average_loss:0.08442995651062496), time, (train_step_time: 1.46449s, train_average_time: 1.09961s);(grad_norm_mean: nan, grad_norm_step: 2.65380) +2023-10-26 15:32:49 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2680, lr:1.288882e-05, step_mean_loss:0.023111894726753235, average_loss:0.08393454208848225), time, (train_step_time: 1.33544s, train_average_time: 1.10176s);(grad_norm_mean: nan, grad_norm_step: 1.82259) +2023-10-26 15:33:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2700, lr:1.277640e-05, step_mean_loss:0.013363436795771122, average_loss:0.08341652912300627), time, (train_step_time: 0.93125s, train_average_time: 1.10309s);(grad_norm_mean: nan, grad_norm_step: 1.23203) +2023-10-26 15:33:42 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2720, lr:1.266360e-05, step_mean_loss:0.007857194170355797, average_loss:0.08294770802284641), time, (train_step_time: 1.50529s, train_average_time: 1.10520s);(grad_norm_mean: nan, grad_norm_step: 0.99226) +2023-10-26 15:34:08 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2740, lr:1.255043e-05, step_mean_loss:0.030021920800209045, average_loss:0.08247722800971152), time, (train_step_time: 1.34473s, train_average_time: 1.10668s);(grad_norm_mean: nan, grad_norm_step: 6.52212) +2023-10-26 15:34:36 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2760, lr:1.243691e-05, step_mean_loss:0.004767917562276125, average_loss:0.08197414507194481), time, (train_step_time: 1.45796s, train_average_time: 1.10861s);(grad_norm_mean: nan, grad_norm_step: 0.67994) +2023-10-26 15:34:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2780, lr:1.232305e-05, step_mean_loss:0.012315619736909866, average_loss:0.08155640030593093), time, (train_step_time: 0.70810s, train_average_time: 1.10848s);(grad_norm_mean: nan, grad_norm_step: 1.60211) +2023-10-26 15:35:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2800, lr:1.220888e-05, step_mean_loss:0.12725889682769775, average_loss:0.08113167448190195), time, (train_step_time: 0.58373s, train_average_time: 1.10496s);(grad_norm_mean: nan, grad_norm_step: 70.96741) +2023-10-26 15:35:24 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2820, lr:1.209440e-05, step_mean_loss:0.01165765430778265, average_loss:0.0806907139014416), time, (train_step_time: 1.34059s, train_average_time: 1.10222s);(grad_norm_mean: nan, grad_norm_step: 0.86837) +2023-10-26 15:35:40 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2840, lr:1.197964e-05, step_mean_loss:0.013144728727638721, average_loss:0.08030406953016289), time, (train_step_time: 0.61968s, train_average_time: 1.09977s);(grad_norm_mean: nan, grad_norm_step: 0.83327) +2023-10-26 15:35:55 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2860, lr:1.186460e-05, step_mean_loss:0.018058406189084053, average_loss:0.0799359945855883), time, (train_step_time: 1.15234s, train_average_time: 1.09762s);(grad_norm_mean: nan, grad_norm_step: 1.34959) +2023-10-26 15:36:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2880, lr:1.174931e-05, step_mean_loss:0.0032990549225360155, average_loss:0.07946590308953193), time, (train_step_time: 0.59466s, train_average_time: 1.09495s);(grad_norm_mean: nan, grad_norm_step: 0.33751) +2023-10-26 15:36:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2900, lr:1.163377e-05, step_mean_loss:0.0029652700759470463, average_loss:0.07906944819015013), time, (train_step_time: 0.56110s, train_average_time: 1.09287s);(grad_norm_mean: nan, grad_norm_step: 0.22264) +2023-10-26 15:36:39 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 9, steps: 2920, lr:1.151802e-05, step_mean_loss:0.0005198617582209408, average_loss:0.0786814497880012), time, (train_step_time: 0.55112s, train_average_time: 1.09012s);(grad_norm_mean: nan, grad_norm_step: 0.03943) +2023-10-26 15:36:45 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 9, steps: 2930); +2023-10-26 15:36:49 INFO root donut_experiment.py:66 - batch inference time:2.9390013217926025 s +2023-10-26 15:36:51 INFO root donut_experiment.py:66 - batch inference time:2.3512511253356934 s +2023-10-26 15:36:53 INFO root donut_experiment.py:66 - batch inference time:1.4690682888031006 s +2023-10-26 15:36:54 INFO root donut_experiment.py:66 - batch inference time:1.0131900310516357 s +2023-10-26 15:36:56 INFO root donut_experiment.py:66 - batch inference time:1.9072229862213135 s +2023-10-26 15:36:57 INFO root donut_experiment.py:66 - batch inference time:1.0945217609405518 s +2023-10-26 15:36:58 INFO root donut_experiment.py:66 - batch inference time:0.8694400787353516 s +2023-10-26 15:36:59 INFO root donut_experiment.py:66 - batch inference time:1.3058927059173584 s +2023-10-26 15:37:01 INFO root donut_experiment.py:66 - batch inference time:1.4120314121246338 s +2023-10-26 15:37:03 INFO root donut_experiment.py:66 - batch inference time:2.234807252883911 s +2023-10-26 15:37:05 INFO root donut_experiment.py:66 - batch inference time:1.8869171142578125 s +2023-10-26 15:37:06 INFO root donut_experiment.py:66 - batch inference time:0.780698299407959 s +2023-10-26 15:37:08 INFO root donut_experiment.py:66 - batch inference time:1.912482738494873 s +2023-10-26 15:37:09 INFO root donut_experiment.py:66 - batch inference time:1.594322681427002 s +2023-10-26 15:37:11 INFO root donut_experiment.py:66 - batch inference time:1.8652937412261963 s +2023-10-26 15:37:13 INFO root donut_experiment.py:66 - batch inference time:1.4744346141815186 s +2023-10-26 15:37:14 INFO root donut_experiment.py:66 - batch inference time:1.66062331199646 s +2023-10-26 15:37:16 INFO root donut_experiment.py:66 - batch inference time:1.5943326950073242 s +2023-10-26 15:37:16 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:37:16 INFO root donut_experiment.py:76 - token_acc: 0.40632832080200504; edit_dis: 0.11767084577579118 +2023-10-26 15:37:21 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch9_step2930_lr1.146006e-05_avg_loss0.07845_token_acc0.40633_edit_dis0.11767.pth +2023-10-26 15:37:28 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 2940, lr:1.140205e-05, step_mean_loss:0.004357266705483198, average_loss:0.07820397479923226), time, (train_step_time: 0.59494s, train_average_time: 1.08671s);(grad_norm_mean: nan, grad_norm_step: 0.74947) +2023-10-26 15:37:40 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 2960, lr:1.128589e-05, step_mean_loss:0.005129431374371052, average_loss:0.07778343163949152), time, (train_step_time: 0.55984s, train_average_time: 1.08328s);(grad_norm_mean: nan, grad_norm_step: 0.61565) +2023-10-26 15:37:51 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 2980, lr:1.116955e-05, step_mean_loss:0.003814815543591976, average_loss:0.07735319092281139), time, (train_step_time: 0.58756s, train_average_time: 1.07995s);(grad_norm_mean: nan, grad_norm_step: 0.56184) +2023-10-26 15:38:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3000, lr:1.105306e-05, step_mean_loss:0.004479314666241407, average_loss:0.07693971584330332), time, (train_step_time: 0.63207s, train_average_time: 1.07674s);(grad_norm_mean: nan, grad_norm_step: 0.26731) +2023-10-26 15:38:03 INFO root donut_experiment.py:251 - experiment:nougat-base; eval, (epoch: 10, steps: 3000); +2023-10-26 15:38:07 INFO root donut_experiment.py:66 - batch inference time:3.0353474617004395 s +2023-10-26 15:38:10 INFO root donut_experiment.py:66 - batch inference time:2.365455389022827 s +2023-10-26 15:38:11 INFO root donut_experiment.py:66 - batch inference time:1.4819228649139404 s +2023-10-26 15:38:12 INFO root donut_experiment.py:66 - batch inference time:1.0179264545440674 s +2023-10-26 15:38:14 INFO root donut_experiment.py:66 - batch inference time:1.9237728118896484 s +2023-10-26 15:38:15 INFO root donut_experiment.py:66 - batch inference time:1.1085388660430908 s +2023-10-26 15:38:16 INFO root donut_experiment.py:66 - batch inference time:0.8661291599273682 s +2023-10-26 15:38:17 INFO root donut_experiment.py:66 - batch inference time:1.3576314449310303 s +2023-10-26 15:38:19 INFO root donut_experiment.py:66 - batch inference time:1.4219379425048828 s +2023-10-26 15:38:21 INFO root donut_experiment.py:66 - batch inference time:2.010690450668335 s +2023-10-26 15:38:23 INFO root donut_experiment.py:66 - batch inference time:1.8879106044769287 s +2023-10-26 15:38:24 INFO root donut_experiment.py:66 - batch inference time:0.7223770618438721 s +2023-10-26 15:38:26 INFO root donut_experiment.py:66 - batch inference time:2.098186492919922 s +2023-10-26 15:38:27 INFO root donut_experiment.py:66 - batch inference time:1.5772864818572998 s +2023-10-26 15:38:29 INFO root donut_experiment.py:66 - batch inference time:2.1050522327423096 s +2023-10-26 15:38:31 INFO root donut_experiment.py:66 - batch inference time:1.4656822681427002 s +2023-10-26 15:38:33 INFO root donut_experiment.py:66 - batch inference time:1.7819726467132568 s +2023-10-26 15:38:34 INFO root donut_experiment.py:66 - batch inference time:1.602367639541626 s +2023-10-26 15:38:35 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:38:35 INFO root donut_experiment.py:76 - token_acc: 0.346093504863508; edit_dis: 0.09988834762579075 +2023-10-26 15:38:40 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch10_step3000_lr1.105306e-05_avg_loss0.07694_token_acc0.34609_edit_dis0.09989.pth +2023-10-26 15:38:51 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3020, lr:1.093641e-05, step_mean_loss:0.050157856196165085, average_loss:0.07652830076329228), time, (train_step_time: 0.57242s, train_average_time: 1.07339s);(grad_norm_mean: nan, grad_norm_step: 3.51842) +2023-10-26 15:39:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3040, lr:1.081964e-05, step_mean_loss:0.0034478134475648403, average_loss:0.07617036080332822), time, (train_step_time: 0.54900s, train_average_time: 1.07024s);(grad_norm_mean: nan, grad_norm_step: 0.44525) +2023-10-26 15:39:16 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3060, lr:1.070276e-05, step_mean_loss:0.02413705736398697, average_loss:0.07593471758334203), time, (train_step_time: 0.62514s, train_average_time: 1.06725s);(grad_norm_mean: nan, grad_norm_step: 2.58008) +2023-10-26 15:39:27 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3080, lr:1.058578e-05, step_mean_loss:0.023137295618653297, average_loss:0.07552932356099648), time, (train_step_time: 0.64822s, train_average_time: 1.06412s);(grad_norm_mean: nan, grad_norm_step: 1.11326) +2023-10-26 15:39:39 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3100, lr:1.046872e-05, step_mean_loss:0.0554107241332531, average_loss:0.07513100395426003), time, (train_step_time: 0.56716s, train_average_time: 1.06096s);(grad_norm_mean: nan, grad_norm_step: 10.65080) +2023-10-26 15:39:50 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3120, lr:1.035160e-05, step_mean_loss:0.03794033080339432, average_loss:0.07470709174574436), time, (train_step_time: 0.57247s, train_average_time: 1.05786s);(grad_norm_mean: nan, grad_norm_step: 1.86714) +2023-10-26 15:40:02 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3140, lr:1.023443e-05, step_mean_loss:0.0016498323529958725, average_loss:0.07439520441614937), time, (train_step_time: 0.56120s, train_average_time: 1.05483s);(grad_norm_mean: nan, grad_norm_step: 0.16382) +2023-10-26 15:40:13 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3160, lr:1.011722e-05, step_mean_loss:0.0009686744888313115, average_loss:0.0741695992829165), time, (train_step_time: 0.53683s, train_average_time: 1.05163s);(grad_norm_mean: nan, grad_norm_step: 0.10760) +2023-10-26 15:40:25 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3180, lr:1.000000e-05, step_mean_loss:0.0065787010826170444, average_loss:0.07376504133273053), time, (train_step_time: 0.58453s, train_average_time: 1.04882s);(grad_norm_mean: nan, grad_norm_step: 1.68320) +2023-10-26 15:40:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3200, lr:9.882779e-06, step_mean_loss:0.009472189471125603, average_loss:0.07336033159830095), time, (train_step_time: 0.56554s, train_average_time: 1.04584s);(grad_norm_mean: nan, grad_norm_step: 0.65445) +2023-10-26 15:40:48 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 10, steps: 3220, lr:9.765574e-06, step_mean_loss:0.01719321310520172, average_loss:0.07297029585492315), time, (train_step_time: 0.52481s, train_average_time: 1.04295s);(grad_norm_mean: nan, grad_norm_step: 1.70726) +2023-10-26 15:40:50 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 10, steps: 3223); +2023-10-26 15:40:54 INFO root donut_experiment.py:66 - batch inference time:3.074943780899048 s +2023-10-26 15:40:56 INFO root donut_experiment.py:66 - batch inference time:2.319822311401367 s +2023-10-26 15:40:58 INFO root donut_experiment.py:66 - batch inference time:1.4328596591949463 s +2023-10-26 15:40:59 INFO root donut_experiment.py:66 - batch inference time:1.0202910900115967 s +2023-10-26 15:41:01 INFO root donut_experiment.py:66 - batch inference time:1.989196538925171 s +2023-10-26 15:41:02 INFO root donut_experiment.py:66 - batch inference time:1.111884355545044 s +2023-10-26 15:41:03 INFO root donut_experiment.py:66 - batch inference time:0.8643090724945068 s +2023-10-26 15:41:04 INFO root donut_experiment.py:66 - batch inference time:1.3627395629882812 s +2023-10-26 15:41:06 INFO root donut_experiment.py:66 - batch inference time:1.5996136665344238 s +2023-10-26 15:41:08 INFO root donut_experiment.py:66 - batch inference time:1.9939746856689453 s +2023-10-26 15:41:10 INFO root donut_experiment.py:66 - batch inference time:1.8583166599273682 s +2023-10-26 15:41:10 INFO root donut_experiment.py:66 - batch inference time:0.7464609146118164 s +2023-10-26 15:41:12 INFO root donut_experiment.py:66 - batch inference time:2.0081136226654053 s +2023-10-26 15:41:14 INFO root donut_experiment.py:66 - batch inference time:1.5985586643218994 s +2023-10-26 15:41:16 INFO root donut_experiment.py:66 - batch inference time:1.7563741207122803 s +2023-10-26 15:41:17 INFO root donut_experiment.py:66 - batch inference time:1.4257993698120117 s +2023-10-26 15:41:19 INFO root donut_experiment.py:66 - batch inference time:1.8068625926971436 s +2023-10-26 15:41:21 INFO root donut_experiment.py:66 - batch inference time:1.6164464950561523 s +2023-10-26 15:41:21 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:41:21 INFO root donut_experiment.py:76 - token_acc: 0.4299182903834067; edit_dis: 0.09637272464658514 +2023-10-26 15:41:26 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch10_step3223_lr9.747996e-06_avg_loss0.07291_token_acc0.42992_edit_dis0.09637.pth +2023-10-26 15:41:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3240, lr:9.648402e-06, step_mean_loss:0.005271051079034805, average_loss:0.07257737967045637), time, (train_step_time: 0.56572s, train_average_time: 1.04001s);(grad_norm_mean: nan, grad_norm_step: 0.51342) +2023-10-26 15:41:48 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3260, lr:9.531277e-06, step_mean_loss:0.0019790141377598047, average_loss:0.07217345355005417), time, (train_step_time: 0.56450s, train_average_time: 1.03708s);(grad_norm_mean: nan, grad_norm_step: 0.31031) +2023-10-26 15:42:00 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3280, lr:9.414217e-06, step_mean_loss:0.0006153426948003471, average_loss:0.07179939423748492), time, (train_step_time: 0.59736s, train_average_time: 1.03431s);(grad_norm_mean: nan, grad_norm_step: 0.04015) +2023-10-26 15:42:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3300, lr:9.297238e-06, step_mean_loss:0.015104847960174084, average_loss:0.07140368115107941), time, (train_step_time: 0.56496s, train_average_time: 1.03156s);(grad_norm_mean: nan, grad_norm_step: 4.00713) +2023-10-26 15:42:23 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3320, lr:9.180355e-06, step_mean_loss:0.003253814298659563, average_loss:0.07102114641180729), time, (train_step_time: 0.60288s, train_average_time: 1.02885s);(grad_norm_mean: nan, grad_norm_step: 0.26068) +2023-10-26 15:42:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3340, lr:9.063585e-06, step_mean_loss:0.015005898661911488, average_loss:0.07072611909459814), time, (train_step_time: 0.53928s, train_average_time: 1.02619s);(grad_norm_mean: nan, grad_norm_step: 1.71634) +2023-10-26 15:42:46 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3360, lr:8.946944e-06, step_mean_loss:0.01125512458384037, average_loss:0.07038615275815097), time, (train_step_time: 0.58664s, train_average_time: 1.02348s);(grad_norm_mean: nan, grad_norm_step: 0.79359) +2023-10-26 15:42:57 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3380, lr:8.830447e-06, step_mean_loss:0.0006324743735603988, average_loss:0.07001591865690139), time, (train_step_time: 0.56765s, train_average_time: 1.02081s);(grad_norm_mean: nan, grad_norm_step: 0.05285) +2023-10-26 15:43:09 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3400, lr:8.714111e-06, step_mean_loss:0.00454572681337595, average_loss:0.069672722302224), time, (train_step_time: 0.52969s, train_average_time: 1.01826s);(grad_norm_mean: nan, grad_norm_step: 0.70538) +2023-10-26 15:43:21 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3420, lr:8.597951e-06, step_mean_loss:0.004899646155536175, average_loss:0.06929523859753689), time, (train_step_time: 0.59302s, train_average_time: 1.01568s);(grad_norm_mean: nan, grad_norm_step: 0.52639) +2023-10-26 15:43:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3440, lr:8.481985e-06, step_mean_loss:0.003742419183254242, average_loss:0.06893951632736453), time, (train_step_time: 0.57497s, train_average_time: 1.01314s);(grad_norm_mean: nan, grad_norm_step: 0.31202) +2023-10-26 15:43:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3460, lr:8.366226e-06, step_mean_loss:0.00497629726305604, average_loss:0.06863181575323755), time, (train_step_time: 0.54689s, train_average_time: 1.01065s);(grad_norm_mean: nan, grad_norm_step: 1.17855) +2023-10-26 15:43:56 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3480, lr:8.250693e-06, step_mean_loss:0.006332114804536104, average_loss:0.06827276786278308), time, (train_step_time: 0.60255s, train_average_time: 1.00814s);(grad_norm_mean: nan, grad_norm_step: 0.77139) +2023-10-26 15:44:07 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 11, steps: 3500, lr:8.135399e-06, step_mean_loss:0.0014325155643746257, average_loss:0.06793318414420355), time, (train_step_time: 0.56261s, train_average_time: 1.00579s);(grad_norm_mean: nan, grad_norm_step: 0.16051) +2023-10-26 15:44:17 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 11, steps: 3516); +2023-10-26 15:44:21 INFO root donut_experiment.py:66 - batch inference time:3.106025218963623 s +2023-10-26 15:44:23 INFO root donut_experiment.py:66 - batch inference time:2.3375542163848877 s +2023-10-26 15:44:25 INFO root donut_experiment.py:66 - batch inference time:1.4783072471618652 s +2023-10-26 15:44:26 INFO root donut_experiment.py:66 - batch inference time:0.972416877746582 s +2023-10-26 15:44:28 INFO root donut_experiment.py:66 - batch inference time:2.1712944507598877 s +2023-10-26 15:44:29 INFO root donut_experiment.py:66 - batch inference time:1.1016438007354736 s +2023-10-26 15:44:30 INFO root donut_experiment.py:66 - batch inference time:0.8110435009002686 s +2023-10-26 15:44:31 INFO root donut_experiment.py:66 - batch inference time:1.3295514583587646 s +2023-10-26 15:44:33 INFO root donut_experiment.py:66 - batch inference time:1.514298677444458 s +2023-10-26 15:44:35 INFO root donut_experiment.py:66 - batch inference time:2.0926787853240967 s +2023-10-26 15:44:37 INFO root donut_experiment.py:66 - batch inference time:1.850998878479004 s +2023-10-26 15:44:38 INFO root donut_experiment.py:66 - batch inference time:0.8196916580200195 s +2023-10-26 15:44:39 INFO root donut_experiment.py:66 - batch inference time:1.8466777801513672 s +2023-10-26 15:44:41 INFO root donut_experiment.py:66 - batch inference time:1.585484266281128 s +2023-10-26 15:44:43 INFO root donut_experiment.py:66 - batch inference time:2.11431622505188 s +2023-10-26 15:44:45 INFO root donut_experiment.py:66 - batch inference time:1.5077388286590576 s +2023-10-26 15:44:47 INFO root donut_experiment.py:66 - batch inference time:1.6840670108795166 s +2023-10-26 15:44:48 INFO root donut_experiment.py:66 - batch inference time:1.6380500793457031 s +2023-10-26 15:44:48 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:44:48 INFO root donut_experiment.py:76 - token_acc: 0.3826578699340245; edit_dis: 0.10361103466081092 +2023-10-26 15:44:53 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch11_step3516_lr8.043349e-06_avg_loss0.06766_token_acc0.38266_edit_dis0.10361.pth +2023-10-26 15:44:57 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3520, lr:8.020362e-06, step_mean_loss:0.0009686020785011351, average_loss:0.06758682475821264), time, (train_step_time: 0.73110s, train_average_time: 1.00368s);(grad_norm_mean: nan, grad_norm_step: 0.10438) +2023-10-26 15:45:15 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3540, lr:7.905597e-06, step_mean_loss:0.015111486427485943, average_loss:0.06725712194420956), time, (train_step_time: 0.56497s, train_average_time: 1.00288s);(grad_norm_mean: nan, grad_norm_step: 1.57573) +2023-10-26 15:45:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3560, lr:7.791120e-06, step_mean_loss:0.0004816934815607965, average_loss:0.06694776305945667), time, (train_step_time: 0.58060s, train_average_time: 1.00037s);(grad_norm_mean: nan, grad_norm_step: 0.09382) +2023-10-26 15:45:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3580, lr:7.676946e-06, step_mean_loss:0.09183253347873688, average_loss:0.0666357473217929), time, (train_step_time: 0.63075s, train_average_time: 0.99803s);(grad_norm_mean: nan, grad_norm_step: 7.58447) +2023-10-26 15:45:49 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3600, lr:7.563092e-06, step_mean_loss:0.015620698221027851, average_loss:0.06629467381754593), time, (train_step_time: 0.62666s, train_average_time: 0.99572s);(grad_norm_mean: nan, grad_norm_step: 0.90113) +2023-10-26 15:46:01 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3620, lr:7.449572e-06, step_mean_loss:8.522503048880026e-05, average_loss:0.06595480836090271), time, (train_step_time: 0.56182s, train_average_time: 0.99349s);(grad_norm_mean: nan, grad_norm_step: 0.00849) +2023-10-26 15:46:13 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3640, lr:7.336403e-06, step_mean_loss:0.0052633630111813545, average_loss:0.06563791752059742), time, (train_step_time: 0.62898s, train_average_time: 0.99122s);(grad_norm_mean: nan, grad_norm_step: 0.41973) +2023-10-26 15:46:24 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3660, lr:7.223600e-06, step_mean_loss:0.0001268827763851732, average_loss:0.06530890863807562), time, (train_step_time: 0.55418s, train_average_time: 0.98899s);(grad_norm_mean: nan, grad_norm_step: 0.02877) +2023-10-26 15:46:36 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3680, lr:7.111178e-06, step_mean_loss:0.00105321838054806, average_loss:0.06498967812763112), time, (train_step_time: 0.54743s, train_average_time: 0.98670s);(grad_norm_mean: nan, grad_norm_step: 0.31585) +2023-10-26 15:46:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3700, lr:6.999154e-06, step_mean_loss:0.0006931375246495008, average_loss:0.06467686310440193), time, (train_step_time: 0.54815s, train_average_time: 0.98441s);(grad_norm_mean: nan, grad_norm_step: 0.17444) +2023-10-26 15:46:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3720, lr:6.887541e-06, step_mean_loss:0.007384999189525843, average_loss:0.06434991273391823), time, (train_step_time: 0.58444s, train_average_time: 0.98219s);(grad_norm_mean: nan, grad_norm_step: 1.85871) +2023-10-26 15:47:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3740, lr:6.776357e-06, step_mean_loss:0.001123822177760303, average_loss:0.06408229714920295), time, (train_step_time: 0.55195s, train_average_time: 0.98003s);(grad_norm_mean: nan, grad_norm_step: 0.14705) +2023-10-26 15:47:22 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3760, lr:6.665615e-06, step_mean_loss:0.000447617843747139, average_loss:0.06377307745657124), time, (train_step_time: 0.57843s, train_average_time: 0.97791s);(grad_norm_mean: nan, grad_norm_step: 0.03108) +2023-10-26 15:47:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3780, lr:6.555331e-06, step_mean_loss:0.0002426256105536595, average_loss:0.06348430022250877), time, (train_step_time: 0.65390s, train_average_time: 0.97578s);(grad_norm_mean: nan, grad_norm_step: 0.03282) +2023-10-26 15:47:45 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 12, steps: 3800, lr:6.445521e-06, step_mean_loss:0.001113711972720921, average_loss:0.06320441157254734), time, (train_step_time: 0.56637s, train_average_time: 0.97380s);(grad_norm_mean: nan, grad_norm_step: 0.10708) +2023-10-26 15:47:51 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 12, steps: 3809); +2023-10-26 15:47:55 INFO root donut_experiment.py:66 - batch inference time:3.014953136444092 s +2023-10-26 15:47:57 INFO root donut_experiment.py:66 - batch inference time:2.3253326416015625 s +2023-10-26 15:47:58 INFO root donut_experiment.py:66 - batch inference time:1.4293339252471924 s +2023-10-26 15:48:00 INFO root donut_experiment.py:66 - batch inference time:1.078824758529663 s +2023-10-26 15:48:01 INFO root donut_experiment.py:66 - batch inference time:1.9004414081573486 s +2023-10-26 15:48:03 INFO root donut_experiment.py:66 - batch inference time:1.113821029663086 s +2023-10-26 15:48:03 INFO root donut_experiment.py:66 - batch inference time:0.8560712337493896 s +2023-10-26 15:48:05 INFO root donut_experiment.py:66 - batch inference time:1.3663160800933838 s +2023-10-26 15:48:06 INFO root donut_experiment.py:66 - batch inference time:1.4210255146026611 s +2023-10-26 15:48:08 INFO root donut_experiment.py:66 - batch inference time:2.0556414127349854 s +2023-10-26 15:48:11 INFO root donut_experiment.py:66 - batch inference time:2.3974125385284424 s +2023-10-26 15:48:12 INFO root donut_experiment.py:66 - batch inference time:0.7379999160766602 s +2023-10-26 15:48:13 INFO root donut_experiment.py:66 - batch inference time:1.8235459327697754 s +2023-10-26 15:48:15 INFO root donut_experiment.py:66 - batch inference time:1.7838833332061768 s +2023-10-26 15:48:17 INFO root donut_experiment.py:66 - batch inference time:1.9072208404541016 s +2023-10-26 15:48:19 INFO root donut_experiment.py:66 - batch inference time:1.6187117099761963 s +2023-10-26 15:48:21 INFO root donut_experiment.py:66 - batch inference time:1.7731149196624756 s +2023-10-26 15:48:22 INFO root donut_experiment.py:66 - batch inference time:1.4385740756988525 s +2023-10-26 15:48:22 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:48:22 INFO root donut_experiment.py:76 - token_acc: 0.38163201985727585; edit_dis: 0.12366133554530112 +2023-10-26 15:48:28 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch12_step3809_lr6.396265e-06_avg_loss0.06307_token_acc0.38163_edit_dis0.12366.pth +2023-10-26 15:48:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3820, lr:6.336200e-06, step_mean_loss:0.0004714750684797764, average_loss:0.06289392631853215), time, (train_step_time: 0.63457s, train_average_time: 0.97180s);(grad_norm_mean: nan, grad_norm_step: 0.03299) +2023-10-26 15:48:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3840, lr:6.227381e-06, step_mean_loss:0.00019418325973674655, average_loss:0.06265296507898105), time, (train_step_time: 0.53456s, train_average_time: 0.96980s);(grad_norm_mean: nan, grad_norm_step: 0.02460) +2023-10-26 15:48:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3860, lr:6.119081e-06, step_mean_loss:0.00015980005264282227, average_loss:0.06234441766586023), time, (train_step_time: 0.54626s, train_average_time: 0.96770s);(grad_norm_mean: nan, grad_norm_step: 0.02834) +2023-10-26 15:49:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3880, lr:6.011315e-06, step_mean_loss:0.0013791491510346532, average_loss:0.06208743191004707), time, (train_step_time: 0.54941s, train_average_time: 0.96577s);(grad_norm_mean: nan, grad_norm_step: 0.15776) +2023-10-26 15:49:22 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3900, lr:5.904096e-06, step_mean_loss:0.016577310860157013, average_loss:0.061828587368354185), time, (train_step_time: 0.53069s, train_average_time: 0.96374s);(grad_norm_mean: nan, grad_norm_step: 3.60162) +2023-10-26 15:49:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3920, lr:5.797441e-06, step_mean_loss:0.0019514306914061308, average_loss:0.06155047126274402), time, (train_step_time: 0.59263s, train_average_time: 0.96165s);(grad_norm_mean: nan, grad_norm_step: 1.17950) +2023-10-26 15:49:45 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3940, lr:5.691363e-06, step_mean_loss:0.00274052657186985, average_loss:0.06126009916119868), time, (train_step_time: 0.65166s, train_average_time: 0.95980s);(grad_norm_mean: nan, grad_norm_step: 0.43682) +2023-10-26 15:49:56 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3960, lr:5.585876e-06, step_mean_loss:0.002419867552816868, average_loss:0.06099686077595099), time, (train_step_time: 0.62605s, train_average_time: 0.95783s);(grad_norm_mean: nan, grad_norm_step: 0.76463) +2023-10-26 15:50:08 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 3980, lr:5.480997e-06, step_mean_loss:0.0014699496096000075, average_loss:0.060723670060946665), time, (train_step_time: 0.56898s, train_average_time: 0.95592s);(grad_norm_mean: nan, grad_norm_step: 0.22470) +2023-10-26 15:50:19 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 4000, lr:5.376738e-06, step_mean_loss:0.000996480928733945, average_loss:0.0604350515423339), time, (train_step_time: 0.57542s, train_average_time: 0.95400s);(grad_norm_mean: nan, grad_norm_step: 0.08408) +2023-10-26 15:50:19 INFO root donut_experiment.py:251 - experiment:nougat-base; eval, (epoch: 13, steps: 4000); +2023-10-26 15:50:23 INFO root donut_experiment.py:66 - batch inference time:3.2001540660858154 s +2023-10-26 15:50:26 INFO root donut_experiment.py:66 - batch inference time:2.4812376499176025 s +2023-10-26 15:50:27 INFO root donut_experiment.py:66 - batch inference time:1.4696273803710938 s +2023-10-26 15:50:28 INFO root donut_experiment.py:66 - batch inference time:1.0366301536560059 s +2023-10-26 15:50:30 INFO root donut_experiment.py:66 - batch inference time:1.9125869274139404 s +2023-10-26 15:50:31 INFO root donut_experiment.py:66 - batch inference time:1.139012336730957 s +2023-10-26 15:50:32 INFO root donut_experiment.py:66 - batch inference time:0.8204319477081299 s +2023-10-26 15:50:34 INFO root donut_experiment.py:66 - batch inference time:1.3153059482574463 s +2023-10-26 15:50:35 INFO root donut_experiment.py:66 - batch inference time:1.415442943572998 s +2023-10-26 15:50:37 INFO root donut_experiment.py:66 - batch inference time:1.9878652095794678 s +2023-10-26 15:50:39 INFO root donut_experiment.py:66 - batch inference time:2.0328965187072754 s +2023-10-26 15:50:40 INFO root donut_experiment.py:66 - batch inference time:0.800119161605835 s +2023-10-26 15:50:42 INFO root donut_experiment.py:66 - batch inference time:2.0514681339263916 s +2023-10-26 15:50:44 INFO root donut_experiment.py:66 - batch inference time:1.5644984245300293 s +2023-10-26 15:50:45 INFO root donut_experiment.py:66 - batch inference time:1.8519158363342285 s +2023-10-26 15:50:47 INFO root donut_experiment.py:66 - batch inference time:1.3507211208343506 s +2023-10-26 15:50:49 INFO root donut_experiment.py:66 - batch inference time:1.770683765411377 s +2023-10-26 15:50:50 INFO root donut_experiment.py:66 - batch inference time:1.5980970859527588 s +2023-10-26 15:50:50 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:50:50 INFO root donut_experiment.py:76 - token_acc: 0.3333333333333333; edit_dis: 0.11756193555734454 +2023-10-26 15:50:56 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch13_step4000_lr5.376738e-06_avg_loss0.06044_token_acc0.33333_edit_dis0.11756.pth +2023-10-26 15:51:08 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 4020, lr:5.273115e-06, step_mean_loss:0.0004680860147345811, average_loss:0.060156072137996223), time, (train_step_time: 0.61043s, train_average_time: 0.95223s);(grad_norm_mean: nan, grad_norm_step: 0.08882) +2023-10-26 15:51:19 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 4040, lr:5.170141e-06, step_mean_loss:0.001293710432946682, average_loss:0.0599065060691891), time, (train_step_time: 0.62335s, train_average_time: 0.95039s);(grad_norm_mean: nan, grad_norm_step: 0.36026) +2023-10-26 15:51:31 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 4060, lr:5.067831e-06, step_mean_loss:0.08942689001560211, average_loss:0.059642196014056695), time, (train_step_time: 0.54836s, train_average_time: 0.94850s);(grad_norm_mean: nan, grad_norm_step: 7.68735) +2023-10-26 15:51:42 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 4080, lr:4.966199e-06, step_mean_loss:0.0009603442158550024, average_loss:0.059406849040843715), time, (train_step_time: 0.55244s, train_average_time: 0.94661s);(grad_norm_mean: nan, grad_norm_step: 0.21399) +2023-10-26 15:51:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 13, steps: 4100, lr:4.865258e-06, step_mean_loss:0.0006205111276358366, average_loss:0.05913897803637466), time, (train_step_time: 0.55952s, train_average_time: 0.94471s);(grad_norm_mean: nan, grad_norm_step: 0.08307) +2023-10-26 15:51:54 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 13, steps: 4102); +2023-10-26 15:51:58 INFO root donut_experiment.py:66 - batch inference time:3.195986270904541 s +2023-10-26 15:52:01 INFO root donut_experiment.py:66 - batch inference time:2.4219980239868164 s +2023-10-26 15:52:02 INFO root donut_experiment.py:66 - batch inference time:1.5136184692382812 s +2023-10-26 15:52:03 INFO root donut_experiment.py:66 - batch inference time:1.073047161102295 s +2023-10-26 15:52:06 INFO root donut_experiment.py:66 - batch inference time:2.09784197807312 s +2023-10-26 15:52:07 INFO root donut_experiment.py:66 - batch inference time:1.1408555507659912 s +2023-10-26 15:52:08 INFO root donut_experiment.py:66 - batch inference time:0.8390083312988281 s +2023-10-26 15:52:09 INFO root donut_experiment.py:66 - batch inference time:1.353184461593628 s +2023-10-26 15:52:10 INFO root donut_experiment.py:66 - batch inference time:1.382908582687378 s +2023-10-26 15:52:12 INFO root donut_experiment.py:66 - batch inference time:1.9670791625976562 s +2023-10-26 15:52:15 INFO root donut_experiment.py:66 - batch inference time:2.1630783081054688 s +2023-10-26 15:52:15 INFO root donut_experiment.py:66 - batch inference time:0.8667535781860352 s +2023-10-26 15:52:18 INFO root donut_experiment.py:66 - batch inference time:2.213770627975464 s +2023-10-26 15:52:19 INFO root donut_experiment.py:66 - batch inference time:1.4638795852661133 s +2023-10-26 15:52:21 INFO root donut_experiment.py:66 - batch inference time:1.8758349418640137 s +2023-10-26 15:52:23 INFO root donut_experiment.py:66 - batch inference time:1.4887688159942627 s +2023-10-26 15:52:24 INFO root donut_experiment.py:66 - batch inference time:1.765812635421753 s +2023-10-26 15:52:26 INFO root donut_experiment.py:66 - batch inference time:1.6814968585968018 s +2023-10-26 15:52:26 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:52:26 INFO root donut_experiment.py:76 - token_acc: 0.4161741835147745; edit_dis: 0.11420040601822468 +2023-10-26 15:52:32 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch13_step4102_lr4.855203e-06_avg_loss0.05911_token_acc0.41617_edit_dis0.11420.pth +2023-10-26 15:52:46 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4120, lr:4.765023e-06, step_mean_loss:0.0004045661189593375, average_loss:0.058867124857034), time, (train_step_time: 0.78689s, train_average_time: 0.94364s);(grad_norm_mean: nan, grad_norm_step: 0.05092) +2023-10-26 15:52:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4140, lr:4.665507e-06, step_mean_loss:0.0030862167477607727, average_loss:0.058637285979436814), time, (train_step_time: 0.56405s, train_average_time: 0.94206s);(grad_norm_mean: nan, grad_norm_step: 0.39337) +2023-10-26 15:53:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4160, lr:4.566725e-06, step_mean_loss:0.00037875937414355576, average_loss:0.058381263195745374), time, (train_step_time: 0.57791s, train_average_time: 0.94034s);(grad_norm_mean: nan, grad_norm_step: 0.05030) +2023-10-26 15:53:22 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4180, lr:4.468688e-06, step_mean_loss:0.04479603469371796, average_loss:0.05814101874550216), time, (train_step_time: 0.58521s, train_average_time: 0.93862s);(grad_norm_mean: nan, grad_norm_step: 5.96545) +2023-10-26 15:53:33 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4200, lr:4.371412e-06, step_mean_loss:6.847708573332056e-05, average_loss:0.05790188241892403), time, (train_step_time: 0.57315s, train_average_time: 0.93693s);(grad_norm_mean: nan, grad_norm_step: 0.01158) +2023-10-26 15:53:45 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4220, lr:4.274910e-06, step_mean_loss:0.000850886688567698, average_loss:0.05767811430239955), time, (train_step_time: 0.62509s, train_average_time: 0.93530s);(grad_norm_mean: nan, grad_norm_step: 0.18904) +2023-10-26 15:53:57 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4240, lr:4.179194e-06, step_mean_loss:0.0006612797151319683, average_loss:0.05743022661518585), time, (train_step_time: 0.60100s, train_average_time: 0.93361s);(grad_norm_mean: nan, grad_norm_step: 0.07779) +2023-10-26 15:54:08 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4260, lr:4.084278e-06, step_mean_loss:0.007007521577179432, average_loss:0.057177757990304064), time, (train_step_time: 0.58731s, train_average_time: 0.93194s);(grad_norm_mean: nan, grad_norm_step: 0.92479) +2023-10-26 15:54:20 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4280, lr:3.990175e-06, step_mean_loss:0.0038386103697121143, average_loss:0.05695077762590793), time, (train_step_time: 0.55888s, train_average_time: 0.93022s);(grad_norm_mean: nan, grad_norm_step: 1.76105) +2023-10-26 15:54:31 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4300, lr:3.896897e-06, step_mean_loss:0.003971411846578121, average_loss:0.05670605292876686), time, (train_step_time: 0.60632s, train_average_time: 0.92851s);(grad_norm_mean: nan, grad_norm_step: 1.44369) +2023-10-26 15:54:43 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4320, lr:3.804459e-06, step_mean_loss:0.00022311555221676826, average_loss:0.056474757725653), time, (train_step_time: 0.57139s, train_average_time: 0.92692s);(grad_norm_mean: nan, grad_norm_step: 0.04022) +2023-10-26 15:54:54 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4340, lr:3.712871e-06, step_mean_loss:0.0001715654943836853, average_loss:0.056234665536199484), time, (train_step_time: 0.59768s, train_average_time: 0.92533s);(grad_norm_mean: nan, grad_norm_step: 0.01494) +2023-10-26 15:55:06 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4360, lr:3.622148e-06, step_mean_loss:0.0006092577241361141, average_loss:0.056007627619204234), time, (train_step_time: 0.56462s, train_average_time: 0.92377s);(grad_norm_mean: nan, grad_norm_step: 0.04651) +2023-10-26 15:55:17 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 14, steps: 4380, lr:3.532301e-06, step_mean_loss:0.0002798394416458905, average_loss:0.05576302202282944), time, (train_step_time: 0.53631s, train_average_time: 0.92214s);(grad_norm_mean: nan, grad_norm_step: 0.02449) +2023-10-26 15:55:26 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 14, steps: 4395); +2023-10-26 15:55:30 INFO root donut_experiment.py:66 - batch inference time:3.383770227432251 s +2023-10-26 15:55:33 INFO root donut_experiment.py:66 - batch inference time:2.3337531089782715 s +2023-10-26 15:55:34 INFO root donut_experiment.py:66 - batch inference time:1.494964838027954 s +2023-10-26 15:55:35 INFO root donut_experiment.py:66 - batch inference time:1.082383632659912 s +2023-10-26 15:55:37 INFO root donut_experiment.py:66 - batch inference time:1.9786133766174316 s +2023-10-26 15:55:39 INFO root donut_experiment.py:66 - batch inference time:1.1285691261291504 s +2023-10-26 15:55:40 INFO root donut_experiment.py:66 - batch inference time:0.8884420394897461 s +2023-10-26 15:55:41 INFO root donut_experiment.py:66 - batch inference time:1.415724277496338 s +2023-10-26 15:55:42 INFO root donut_experiment.py:66 - batch inference time:1.4162755012512207 s +2023-10-26 15:55:44 INFO root donut_experiment.py:66 - batch inference time:2.01814603805542 s +2023-10-26 15:55:47 INFO root donut_experiment.py:66 - batch inference time:2.084537982940674 s +2023-10-26 15:55:47 INFO root donut_experiment.py:66 - batch inference time:0.7458174228668213 s +2023-10-26 15:55:49 INFO root donut_experiment.py:66 - batch inference time:1.810288667678833 s +2023-10-26 15:55:51 INFO root donut_experiment.py:66 - batch inference time:1.548619270324707 s +2023-10-26 15:55:53 INFO root donut_experiment.py:66 - batch inference time:1.8084490299224854 s +2023-10-26 15:55:54 INFO root donut_experiment.py:66 - batch inference time:1.3862314224243164 s +2023-10-26 15:55:56 INFO root donut_experiment.py:66 - batch inference time:1.842613935470581 s +2023-10-26 15:55:57 INFO root donut_experiment.py:66 - batch inference time:1.5755367279052734 s +2023-10-26 15:55:58 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:55:58 INFO root donut_experiment.py:76 - token_acc: 0.4360972568578554; edit_dis: 0.09744318047360072 +2023-10-26 15:56:03 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch14_step4395_lr3.465498e-06_avg_loss0.05558_token_acc0.43610_edit_dis0.09744.pth +2023-10-26 15:56:07 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4400, lr:3.443343e-06, step_mean_loss:0.0016250312328338623, average_loss:0.05552927742955861), time, (train_step_time: 0.54804s, train_average_time: 0.92053s);(grad_norm_mean: nan, grad_norm_step: 0.26609) +2023-10-26 15:56:19 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4420, lr:3.355285e-06, step_mean_loss:0.0004249938065186143, average_loss:0.05528831176963888), time, (train_step_time: 0.66178s, train_average_time: 0.91907s);(grad_norm_mean: nan, grad_norm_step: 0.03521) +2023-10-26 15:56:31 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4440, lr:3.268141e-06, step_mean_loss:0.12804292142391205, average_loss:0.05508982059665688), time, (train_step_time: 0.64649s, train_average_time: 0.91761s);(grad_norm_mean: nan, grad_norm_step: 9.57705) +2023-10-26 15:56:42 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4460, lr:3.181922e-06, step_mean_loss:7.036576425889507e-05, average_loss:0.054874799165416724), time, (train_step_time: 0.55496s, train_average_time: 0.91604s);(grad_norm_mean: nan, grad_norm_step: 0.00857) +2023-10-26 15:56:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4480, lr:3.096639e-06, step_mean_loss:0.004460926633328199, average_loss:0.05464675854784267), time, (train_step_time: 0.58800s, train_average_time: 0.91448s);(grad_norm_mean: nan, grad_norm_step: 0.44336) +2023-10-26 15:57:05 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4500, lr:3.012306e-06, step_mean_loss:0.0004304051399230957, average_loss:0.05442575777216876), time, (train_step_time: 0.54671s, train_average_time: 0.91294s);(grad_norm_mean: nan, grad_norm_step: 0.04753) +2023-10-26 15:57:16 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4520, lr:2.928932e-06, step_mean_loss:0.0017982226563617587, average_loss:0.054199835657000546), time, (train_step_time: 0.58422s, train_average_time: 0.91147s);(grad_norm_mean: nan, grad_norm_step: 0.16809) +2023-10-26 15:57:28 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4540, lr:2.846530e-06, step_mean_loss:0.00018628270481713116, average_loss:0.05400329490162602), time, (train_step_time: 0.52596s, train_average_time: 0.90995s);(grad_norm_mean: nan, grad_norm_step: 0.02055) +2023-10-26 15:57:39 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4560, lr:2.765111e-06, step_mean_loss:0.0005997464177198708, average_loss:0.05377908391610139), time, (train_step_time: 0.56343s, train_average_time: 0.90844s);(grad_norm_mean: nan, grad_norm_step: 0.11051) +2023-10-26 15:57:51 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4580, lr:2.684687e-06, step_mean_loss:0.00026081426767632365, average_loss:0.053551386960329156), time, (train_step_time: 0.56270s, train_average_time: 0.90703s);(grad_norm_mean: nan, grad_norm_step: 0.02594) +2023-10-26 15:58:02 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4600, lr:2.605267e-06, step_mean_loss:0.05804293975234032, average_loss:0.05333879913412207), time, (train_step_time: 0.57987s, train_average_time: 0.90559s);(grad_norm_mean: nan, grad_norm_step: 4.12680) +2023-10-26 15:58:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4620, lr:2.526864e-06, step_mean_loss:0.0013759282883256674, average_loss:0.05313716550218483), time, (train_step_time: 0.57949s, train_average_time: 0.90420s);(grad_norm_mean: nan, grad_norm_step: 0.21936) +2023-10-26 15:58:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4640, lr:2.449487e-06, step_mean_loss:0.0003610902058426291, average_loss:0.05293119886757644), time, (train_step_time: 0.57003s, train_average_time: 0.90285s);(grad_norm_mean: nan, grad_norm_step: 0.03693) +2023-10-26 15:58:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4660, lr:2.373148e-06, step_mean_loss:0.02253716252744198, average_loss:0.052713975890250675), time, (train_step_time: 0.55033s, train_average_time: 0.90143s);(grad_norm_mean: nan, grad_norm_step: 2.34544) +2023-10-26 15:58:49 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 15, steps: 4680, lr:2.297857e-06, step_mean_loss:0.0003327516606077552, average_loss:0.05252866357391796), time, (train_step_time: 0.62604s, train_average_time: 0.89998s);(grad_norm_mean: nan, grad_norm_step: 0.03742) +2023-10-26 15:58:53 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 15, steps: 4688); +2023-10-26 15:58:57 INFO root donut_experiment.py:66 - batch inference time:3.0159828662872314 s +2023-10-26 15:59:00 INFO root donut_experiment.py:66 - batch inference time:2.373893976211548 s +2023-10-26 15:59:01 INFO root donut_experiment.py:66 - batch inference time:1.3419201374053955 s +2023-10-26 15:59:02 INFO root donut_experiment.py:66 - batch inference time:1.009335994720459 s +2023-10-26 15:59:04 INFO root donut_experiment.py:66 - batch inference time:1.9125208854675293 s +2023-10-26 15:59:05 INFO root donut_experiment.py:66 - batch inference time:1.082789421081543 s +2023-10-26 15:59:06 INFO root donut_experiment.py:66 - batch inference time:0.8305177688598633 s +2023-10-26 15:59:08 INFO root donut_experiment.py:66 - batch inference time:1.5532481670379639 s +2023-10-26 15:59:09 INFO root donut_experiment.py:66 - batch inference time:1.3885436058044434 s +2023-10-26 15:59:11 INFO root donut_experiment.py:66 - batch inference time:2.208447217941284 s +2023-10-26 15:59:13 INFO root donut_experiment.py:66 - batch inference time:2.1617724895477295 s +2023-10-26 15:59:14 INFO root donut_experiment.py:66 - batch inference time:0.7377831935882568 s +2023-10-26 15:59:16 INFO root donut_experiment.py:66 - batch inference time:2.0356242656707764 s +2023-10-26 15:59:18 INFO root donut_experiment.py:66 - batch inference time:1.554941177368164 s +2023-10-26 15:59:20 INFO root donut_experiment.py:66 - batch inference time:1.9565179347991943 s +2023-10-26 15:59:21 INFO root donut_experiment.py:66 - batch inference time:1.4512465000152588 s +2023-10-26 15:59:23 INFO root donut_experiment.py:66 - batch inference time:1.6603384017944336 s +2023-10-26 15:59:25 INFO root donut_experiment.py:66 - batch inference time:1.5991976261138916 s +2023-10-26 15:59:25 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 15:59:25 INFO root donut_experiment.py:76 - token_acc: 0.37125935162094764; edit_dis: 0.11576043280506015 +2023-10-26 15:59:30 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch15_step4688_lr2.268037e-06_avg_loss0.05244_token_acc0.37126_edit_dis0.11576.pth +2023-10-26 15:59:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4700, lr:2.223625e-06, step_mean_loss:0.08428294956684113, average_loss:0.0523700455008409), time, (train_step_time: 0.58020s, train_average_time: 0.89860s);(grad_norm_mean: nan, grad_norm_step: 3.55582) +2023-10-26 15:59:50 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4720, lr:2.150461e-06, step_mean_loss:6.039971049176529e-05, average_loss:0.052155434377988766), time, (train_step_time: 0.66436s, train_average_time: 0.89723s);(grad_norm_mean: nan, grad_norm_step: 0.00608) +2023-10-26 16:00:01 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4740, lr:2.078375e-06, step_mean_loss:0.01642862893640995, average_loss:0.05198067552895898), time, (train_step_time: 0.56802s, train_average_time: 0.89583s);(grad_norm_mean: nan, grad_norm_step: 0.85282) +2023-10-26 16:00:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4760, lr:2.007378e-06, step_mean_loss:0.03868900239467621, average_loss:0.05178108415350857), time, (train_step_time: 0.56755s, train_average_time: 0.89447s);(grad_norm_mean: nan, grad_norm_step: 3.81494) +2023-10-26 16:00:24 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4780, lr:1.937480e-06, step_mean_loss:0.002308026421815157, average_loss:0.051573704854690046), time, (train_step_time: 0.56222s, train_average_time: 0.89310s);(grad_norm_mean: nan, grad_norm_step: 0.43159) +2023-10-26 16:00:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4800, lr:1.868689e-06, step_mean_loss:5.9913505538133904e-05, average_loss:0.05136477782566847), time, (train_step_time: 0.63693s, train_average_time: 0.89178s);(grad_norm_mean: nan, grad_norm_step: 0.00736) +2023-10-26 16:00:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4820, lr:1.801016e-06, step_mean_loss:0.004170514177531004, average_loss:0.05115658986388436), time, (train_step_time: 0.56486s, train_average_time: 0.89042s);(grad_norm_mean: nan, grad_norm_step: 0.70919) +2023-10-26 16:00:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4840, lr:1.734469e-06, step_mean_loss:0.0003453748649917543, average_loss:0.05096050321877436), time, (train_step_time: 0.53216s, train_average_time: 0.88917s);(grad_norm_mean: nan, grad_norm_step: 0.03411) +2023-10-26 16:01:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4860, lr:1.669058e-06, step_mean_loss:0.00020685385970864445, average_loss:0.05076035856210883), time, (train_step_time: 0.67087s, train_average_time: 0.88808s);(grad_norm_mean: nan, grad_norm_step: 0.01228) +2023-10-26 16:01:23 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4880, lr:1.604792e-06, step_mean_loss:0.00019236179650761187, average_loss:0.050566512862328786), time, (train_step_time: 0.64359s, train_average_time: 0.88699s);(grad_norm_mean: nan, grad_norm_step: 0.01263) +2023-10-26 16:01:35 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4900, lr:1.541679e-06, step_mean_loss:0.0018892651423811913, average_loss:0.050364623522454334), time, (train_step_time: 0.57208s, train_average_time: 0.88573s);(grad_norm_mean: nan, grad_norm_step: 0.26662) +2023-10-26 16:01:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4920, lr:1.479729e-06, step_mean_loss:0.0006053504766896367, average_loss:0.050177168374940025), time, (train_step_time: 0.59640s, train_average_time: 0.88453s);(grad_norm_mean: nan, grad_norm_step: 0.05968) +2023-10-26 16:01:58 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4940, lr:1.418950e-06, step_mean_loss:0.000238781824009493, average_loss:0.05001083712555548), time, (train_step_time: 0.58141s, train_average_time: 0.88331s);(grad_norm_mean: nan, grad_norm_step: 0.02176) +2023-10-26 16:02:10 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4960, lr:1.359349e-06, step_mean_loss:9.388692706124857e-05, average_loss:0.049814016898774446), time, (train_step_time: 0.57440s, train_average_time: 0.88207s);(grad_norm_mean: nan, grad_norm_step: 0.01202) +2023-10-26 16:02:22 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 16, steps: 4980, lr:1.300936e-06, step_mean_loss:0.0004186455626040697, average_loss:0.049643153374577066), time, (train_step_time: 0.60902s, train_average_time: 0.88097s);(grad_norm_mean: nan, grad_norm_step: 0.05701) +2023-10-26 16:02:23 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 16, steps: 4981); +2023-10-26 16:02:27 INFO root donut_experiment.py:66 - batch inference time:3.053123712539673 s +2023-10-26 16:02:29 INFO root donut_experiment.py:66 - batch inference time:2.521077871322632 s +2023-10-26 16:02:31 INFO root donut_experiment.py:66 - batch inference time:1.4223887920379639 s +2023-10-26 16:02:32 INFO root donut_experiment.py:66 - batch inference time:1.003746747970581 s +2023-10-26 16:02:34 INFO root donut_experiment.py:66 - batch inference time:1.946831226348877 s +2023-10-26 16:02:35 INFO root donut_experiment.py:66 - batch inference time:1.107881784439087 s +2023-10-26 16:02:36 INFO root donut_experiment.py:66 - batch inference time:0.9007604122161865 s +2023-10-26 16:02:37 INFO root donut_experiment.py:66 - batch inference time:1.382383108139038 s +2023-10-26 16:02:39 INFO root donut_experiment.py:66 - batch inference time:1.4116196632385254 s +2023-10-26 16:02:41 INFO root donut_experiment.py:66 - batch inference time:1.9993371963500977 s +2023-10-26 16:02:43 INFO root donut_experiment.py:66 - batch inference time:1.9707341194152832 s +2023-10-26 16:02:44 INFO root donut_experiment.py:66 - batch inference time:0.7375314235687256 s +2023-10-26 16:02:45 INFO root donut_experiment.py:66 - batch inference time:1.8962621688842773 s +2023-10-26 16:02:47 INFO root donut_experiment.py:66 - batch inference time:1.6078548431396484 s +2023-10-26 16:02:49 INFO root donut_experiment.py:66 - batch inference time:1.9732625484466553 s +2023-10-26 16:02:51 INFO root donut_experiment.py:66 - batch inference time:1.537980318069458 s +2023-10-26 16:02:53 INFO root donut_experiment.py:66 - batch inference time:1.8641488552093506 s +2023-10-26 16:02:54 INFO root donut_experiment.py:66 - batch inference time:1.776634931564331 s +2023-10-26 16:02:55 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 16:02:55 INFO root donut_experiment.py:76 - token_acc: 0.4547454431175361; edit_dis: 0.0915246352465229 +2023-10-26 16:03:00 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch16_step4981_lr1.298047e-06_avg_loss0.04963_token_acc0.45475_edit_dis0.09152.pth +2023-10-26 16:03:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5000, lr:1.243719e-06, step_mean_loss:0.00019666978914756328, average_loss:0.049451290651069214), time, (train_step_time: 0.61321s, train_average_time: 0.87972s);(grad_norm_mean: nan, grad_norm_step: 0.02118) +2023-10-26 16:03:12 INFO root donut_experiment.py:251 - experiment:nougat-base; eval, (epoch: 17, steps: 5000); +2023-10-26 16:03:16 INFO root donut_experiment.py:66 - batch inference time:3.175837278366089 s +2023-10-26 16:03:18 INFO root donut_experiment.py:66 - batch inference time:2.3557019233703613 s +2023-10-26 16:03:20 INFO root donut_experiment.py:66 - batch inference time:1.475008249282837 s +2023-10-26 16:03:21 INFO root donut_experiment.py:66 - batch inference time:1.1538236141204834 s +2023-10-26 16:03:23 INFO root donut_experiment.py:66 - batch inference time:1.9127635955810547 s +2023-10-26 16:03:24 INFO root donut_experiment.py:66 - batch inference time:1.0984361171722412 s +2023-10-26 16:03:25 INFO root donut_experiment.py:66 - batch inference time:0.8963327407836914 s +2023-10-26 16:03:26 INFO root donut_experiment.py:66 - batch inference time:1.399388313293457 s +2023-10-26 16:03:28 INFO root donut_experiment.py:66 - batch inference time:1.478752613067627 s +2023-10-26 16:03:30 INFO root donut_experiment.py:66 - batch inference time:2.023552179336548 s +2023-10-26 16:03:32 INFO root donut_experiment.py:66 - batch inference time:2.221897602081299 s +2023-10-26 16:03:33 INFO root donut_experiment.py:66 - batch inference time:0.6823761463165283 s +2023-10-26 16:03:35 INFO root donut_experiment.py:66 - batch inference time:2.12558650970459 s +2023-10-26 16:03:37 INFO root donut_experiment.py:66 - batch inference time:1.5740556716918945 s +2023-10-26 16:03:39 INFO root donut_experiment.py:66 - batch inference time:1.8834617137908936 s +2023-10-26 16:03:40 INFO root donut_experiment.py:66 - batch inference time:1.5203723907470703 s +2023-10-26 16:03:42 INFO root donut_experiment.py:66 - batch inference time:1.8503382205963135 s +2023-10-26 16:03:44 INFO root donut_experiment.py:66 - batch inference time:1.6498236656188965 s +2023-10-26 16:03:44 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 16:03:44 INFO root donut_experiment.py:76 - token_acc: 0.42023103340618173; edit_dis: 0.11584386552001726 +2023-10-26 16:03:49 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch17_step5000_lr1.243719e-06_avg_loss0.04945_token_acc0.42023_edit_dis0.11584.pth +2023-10-26 16:04:01 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5020, lr:1.187704e-06, step_mean_loss:0.00018777939840219915, average_loss:0.04928605947071479), time, (train_step_time: 0.64909s, train_average_time: 0.87860s);(grad_norm_mean: nan, grad_norm_step: 0.03458) +2023-10-26 16:04:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5040, lr:1.132901e-06, step_mean_loss:0.006266637239605188, average_loss:0.04909707704521441), time, (train_step_time: 0.73774s, train_average_time: 0.87753s);(grad_norm_mean: nan, grad_norm_step: 0.36588) +2023-10-26 16:04:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5060, lr:1.079316e-06, step_mean_loss:0.007664592005312443, average_loss:0.04891260542585895), time, (train_step_time: 0.55382s, train_average_time: 0.87642s);(grad_norm_mean: nan, grad_norm_step: 1.51163) +2023-10-26 16:04:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5080, lr:1.026956e-06, step_mean_loss:0.0001524475374026224, average_loss:0.04874052288283443), time, (train_step_time: 0.59920s, train_average_time: 0.87551s);(grad_norm_mean: nan, grad_norm_step: 0.02070) +2023-10-26 16:04:51 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5100, lr:9.758300e-07, step_mean_loss:0.0059367697685956955, average_loss:0.04856841798296991), time, (train_step_time: 0.67180s, train_average_time: 0.87452s);(grad_norm_mean: nan, grad_norm_step: 0.59950) +2023-10-26 16:05:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5120, lr:9.259438e-07, step_mean_loss:0.00015591882402077317, average_loss:0.048411174683382185), time, (train_step_time: 0.55790s, train_average_time: 0.87352s);(grad_norm_mean: nan, grad_norm_step: 0.01618) +2023-10-26 16:05:16 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5140, lr:8.773045e-07, step_mean_loss:7.362308679148555e-05, average_loss:0.04823995979683542), time, (train_step_time: 0.62193s, train_average_time: 0.87257s);(grad_norm_mean: nan, grad_norm_step: 0.00931) +2023-10-26 16:05:28 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5160, lr:8.299187e-07, step_mean_loss:0.00028505176305770874, average_loss:0.04805559285585847), time, (train_step_time: 0.58933s, train_average_time: 0.87162s);(grad_norm_mean: nan, grad_norm_step: 0.04054) +2023-10-26 16:05:40 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5180, lr:7.837930e-07, step_mean_loss:0.005012820474803448, average_loss:0.04788072578176035), time, (train_step_time: 0.55996s, train_average_time: 0.87046s);(grad_norm_mean: nan, grad_norm_step: 0.79918) +2023-10-26 16:05:51 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5200, lr:7.389337e-07, step_mean_loss:0.0042486609891057014, average_loss:0.047701029668234626), time, (train_step_time: 0.57356s, train_average_time: 0.86928s);(grad_norm_mean: nan, grad_norm_step: 0.64535) +2023-10-26 16:06:03 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5220, lr:6.953470e-07, step_mean_loss:0.0002977504045702517, average_loss:0.04753415202393037), time, (train_step_time: 0.54589s, train_average_time: 0.86817s);(grad_norm_mean: nan, grad_norm_step: 0.04339) +2023-10-26 16:06:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5240, lr:6.530389e-07, step_mean_loss:0.00012419982522260398, average_loss:0.047366628475227476), time, (train_step_time: 0.61256s, train_average_time: 0.86708s);(grad_norm_mean: nan, grad_norm_step: 0.04743) +2023-10-26 16:06:26 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 17, steps: 5260, lr:6.120152e-07, step_mean_loss:0.0003291910979896784, average_loss:0.04719217500140995), time, (train_step_time: 0.56329s, train_average_time: 0.86592s);(grad_norm_mean: nan, grad_norm_step: 0.03173) +2023-10-26 16:06:34 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 17, steps: 5274); +2023-10-26 16:06:37 INFO root donut_experiment.py:66 - batch inference time:3.0169384479522705 s +2023-10-26 16:06:40 INFO root donut_experiment.py:66 - batch inference time:2.4826500415802 s +2023-10-26 16:06:41 INFO root donut_experiment.py:66 - batch inference time:1.3976850509643555 s +2023-10-26 16:06:42 INFO root donut_experiment.py:66 - batch inference time:0.9705126285552979 s +2023-10-26 16:06:44 INFO root donut_experiment.py:66 - batch inference time:1.880204439163208 s +2023-10-26 16:06:45 INFO root donut_experiment.py:66 - batch inference time:1.080430269241333 s +2023-10-26 16:06:46 INFO root donut_experiment.py:66 - batch inference time:0.8579397201538086 s +2023-10-26 16:06:48 INFO root donut_experiment.py:66 - batch inference time:1.368546962738037 s +2023-10-26 16:06:49 INFO root donut_experiment.py:66 - batch inference time:1.3737316131591797 s +2023-10-26 16:06:51 INFO root donut_experiment.py:66 - batch inference time:1.9842174053192139 s +2023-10-26 16:06:53 INFO root donut_experiment.py:66 - batch inference time:1.6880145072937012 s +2023-10-26 16:06:54 INFO root donut_experiment.py:66 - batch inference time:0.76236891746521 s +2023-10-26 16:06:56 INFO root donut_experiment.py:66 - batch inference time:1.9156920909881592 s +2023-10-26 16:06:57 INFO root donut_experiment.py:66 - batch inference time:1.4899721145629883 s +2023-10-26 16:06:59 INFO root donut_experiment.py:66 - batch inference time:1.8978605270385742 s +2023-10-26 16:07:00 INFO root donut_experiment.py:66 - batch inference time:1.3870439529418945 s +2023-10-26 16:07:02 INFO root donut_experiment.py:66 - batch inference time:1.7836589813232422 s +2023-10-26 16:07:04 INFO root donut_experiment.py:66 - batch inference time:1.6317565441131592 s +2023-10-26 16:07:04 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 16:07:04 INFO root donut_experiment.py:76 - token_acc: 0.41191222570532915; edit_dis: 0.10026370230698543 +2023-10-26 16:07:10 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch17_step5274_lr5.840658e-07_avg_loss0.04708_token_acc0.41191_edit_dis0.10026.pth +2023-10-26 16:07:14 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5280, lr:5.722815e-07, step_mean_loss:0.001258528558537364, average_loss:0.047023400140722746), time, (train_step_time: 0.55854s, train_average_time: 0.86481s);(grad_norm_mean: nan, grad_norm_step: 0.10953) +2023-10-26 16:07:25 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5300, lr:5.338432e-07, step_mean_loss:0.0034795990213751793, average_loss:0.04685405962891584), time, (train_step_time: 0.58086s, train_average_time: 0.86370s);(grad_norm_mean: nan, grad_norm_step: 0.59962) +2023-10-26 16:07:37 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5320, lr:4.967058e-07, step_mean_loss:0.00012439489364624023, average_loss:0.04669564551323004), time, (train_step_time: 0.56900s, train_average_time: 0.86269s);(grad_norm_mean: nan, grad_norm_step: 0.01459) +2023-10-26 16:07:49 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5340, lr:4.608742e-07, step_mean_loss:0.004795086570084095, average_loss:0.04652719865725749), time, (train_step_time: 0.53894s, train_average_time: 0.86167s);(grad_norm_mean: nan, grad_norm_step: 0.96730) +2023-10-26 16:08:01 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5360, lr:4.263534e-07, step_mean_loss:0.00025537912733852863, average_loss:0.046369248357093135), time, (train_step_time: 0.55415s, train_average_time: 0.86060s);(grad_norm_mean: nan, grad_norm_step: 0.02944) +2023-10-26 16:08:13 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5380, lr:3.931481e-07, step_mean_loss:0.0009343654965050519, average_loss:0.0462258308917768), time, (train_step_time: 0.61734s, train_average_time: 0.85962s);(grad_norm_mean: nan, grad_norm_step: 0.20090) +2023-10-26 16:08:24 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5400, lr:3.612630e-07, step_mean_loss:0.001508417190052569, average_loss:0.04605765295583633), time, (train_step_time: 0.60039s, train_average_time: 0.85857s);(grad_norm_mean: nan, grad_norm_step: 0.58000) +2023-10-26 16:08:36 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5420, lr:3.307023e-07, step_mean_loss:0.0001525858388049528, average_loss:0.045899292453545244), time, (train_step_time: 0.62642s, train_average_time: 0.85753s);(grad_norm_mean: nan, grad_norm_step: 0.01570) +2023-10-26 16:08:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5440, lr:3.014703e-07, step_mean_loss:0.0005237623699940741, average_loss:0.045736932917498384), time, (train_step_time: 0.58333s, train_average_time: 0.85649s);(grad_norm_mean: nan, grad_norm_step: 0.06349) +2023-10-26 16:08:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5460, lr:2.735709e-07, step_mean_loss:0.0003920557501260191, average_loss:0.04558344181084463), time, (train_step_time: 0.59514s, train_average_time: 0.85559s);(grad_norm_mean: nan, grad_norm_step: 0.05613) +2023-10-26 16:09:12 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5480, lr:2.470082e-07, step_mean_loss:0.0008000570815056562, average_loss:0.045430484031660445), time, (train_step_time: 0.65299s, train_average_time: 0.85475s);(grad_norm_mean: nan, grad_norm_step: 0.11059) +2023-10-26 16:09:24 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5500, lr:2.217856e-07, step_mean_loss:8.926187001634389e-05, average_loss:0.045280524946422124), time, (train_step_time: 0.55698s, train_average_time: 0.85384s);(grad_norm_mean: nan, grad_norm_step: 0.00523) +2023-10-26 16:09:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5520, lr:1.979066e-07, step_mean_loss:0.012823639437556267, average_loss:0.04512733625579757), time, (train_step_time: 1.54223s, train_average_time: 0.85429s);(grad_norm_mean: nan, grad_norm_step: 1.72216) +2023-10-26 16:10:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5540, lr:1.753746e-07, step_mean_loss:0.00027815773501060903, average_loss:0.04498855805137341), time, (train_step_time: 1.24301s, train_average_time: 0.85609s);(grad_norm_mean: nan, grad_norm_step: 0.02565) +2023-10-26 16:10:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 18, steps: 5560, lr:1.541926e-07, step_mean_loss:0.000916697783395648, average_loss:0.04484030589482991), time, (train_step_time: 1.48988s, train_average_time: 0.85790s);(grad_norm_mean: nan, grad_norm_step: 0.09672) +2023-10-26 16:10:47 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 18, steps: 5567); +2023-10-26 16:10:52 INFO root donut_experiment.py:66 - batch inference time:3.5834922790527344 s +2023-10-26 16:10:55 INFO root donut_experiment.py:66 - batch inference time:2.7497732639312744 s +2023-10-26 16:10:57 INFO root donut_experiment.py:66 - batch inference time:1.7479853630065918 s +2023-10-26 16:10:58 INFO root donut_experiment.py:66 - batch inference time:1.2331335544586182 s +2023-10-26 16:11:00 INFO root donut_experiment.py:66 - batch inference time:2.2248592376708984 s +2023-10-26 16:11:01 INFO root donut_experiment.py:66 - batch inference time:1.304853916168213 s +2023-10-26 16:11:02 INFO root donut_experiment.py:66 - batch inference time:1.0111746788024902 s +2023-10-26 16:11:04 INFO root donut_experiment.py:66 - batch inference time:1.515228271484375 s +2023-10-26 16:11:06 INFO root donut_experiment.py:66 - batch inference time:1.6637616157531738 s +2023-10-26 16:11:08 INFO root donut_experiment.py:66 - batch inference time:2.3108251094818115 s +2023-10-26 16:11:10 INFO root donut_experiment.py:66 - batch inference time:2.3366594314575195 s +2023-10-26 16:11:11 INFO root donut_experiment.py:66 - batch inference time:0.8939123153686523 s +2023-10-26 16:11:14 INFO root donut_experiment.py:66 - batch inference time:2.485607147216797 s +2023-10-26 16:11:16 INFO root donut_experiment.py:66 - batch inference time:1.8932232856750488 s +2023-10-26 16:11:18 INFO root donut_experiment.py:66 - batch inference time:2.3124492168426514 s +2023-10-26 16:11:20 INFO root donut_experiment.py:66 - batch inference time:1.7065961360931396 s +2023-10-26 16:11:22 INFO root donut_experiment.py:66 - batch inference time:2.069149971008301 s +2023-10-26 16:11:24 INFO root donut_experiment.py:66 - batch inference time:1.9346613883972168 s +2023-10-26 16:11:24 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 16:11:24 INFO root donut_experiment.py:76 - token_acc: 0.4276315789473684; edit_dis: 0.10590775131259784 +2023-10-26 16:11:30 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch18_step5567_lr1.470983e-07_avg_loss0.04479_token_acc0.42763_edit_dis0.10591.pth +2023-10-26 16:11:49 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5580, lr:1.343635e-07, step_mean_loss:0.0011475927894935012, average_loss:0.04468301750709109), time, (train_step_time: 1.15485s, train_average_time: 0.85968s);(grad_norm_mean: nan, grad_norm_step: 0.31987) +2023-10-26 16:12:16 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5600, lr:1.158901e-07, step_mean_loss:0.011820200830698013, average_loss:0.04453390975744534), time, (train_step_time: 1.00709s, train_average_time: 0.86158s);(grad_norm_mean: nan, grad_norm_step: 0.42883) +2023-10-26 16:12:44 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5620, lr:9.877485e-08, step_mean_loss:0.0005703243077732623, average_loss:0.044406736660059384), time, (train_step_time: 1.16930s, train_average_time: 0.86336s);(grad_norm_mean: nan, grad_norm_step: 0.06030) +2023-10-26 16:13:11 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5640, lr:8.302018e-08, step_mean_loss:0.0008318964391946793, average_loss:0.04426237499610418), time, (train_step_time: 1.42645s, train_average_time: 0.86514s);(grad_norm_mean: nan, grad_norm_step: 0.12060) +2023-10-26 16:13:38 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5660, lr:6.862823e-08, step_mean_loss:0.000861323089338839, average_loss:0.04411807144573487), time, (train_step_time: 1.17284s, train_average_time: 0.86675s);(grad_norm_mean: nan, grad_norm_step: 0.10135) +2023-10-26 16:14:05 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5680, lr:5.560096e-08, step_mean_loss:0.00010034441947937012, average_loss:0.04396945190764437), time, (train_step_time: 1.45537s, train_average_time: 0.86849s);(grad_norm_mean: nan, grad_norm_step: 0.00926) +2023-10-26 16:14:32 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5700, lr:4.394018e-08, step_mean_loss:0.00029783538775518537, average_loss:0.04381750771479474), time, (train_step_time: 1.35733s, train_average_time: 0.87023s);(grad_norm_mean: nan, grad_norm_step: 0.03528) +2023-10-26 16:14:59 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5720, lr:3.364748e-08, step_mean_loss:0.0007392571424134076, average_loss:0.043672904601906884), time, (train_step_time: 0.93670s, train_average_time: 0.87190s);(grad_norm_mean: nan, grad_norm_step: 0.10397) +2023-10-26 16:15:25 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5740, lr:2.472428e-08, step_mean_loss:0.0007395820575766265, average_loss:0.043555969544961506), time, (train_step_time: 1.26795s, train_average_time: 0.87348s);(grad_norm_mean: nan, grad_norm_step: 0.10644) +2023-10-26 16:15:53 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5760, lr:1.717180e-08, step_mean_loss:0.0021148608066141605, average_loss:0.04341282250004023), time, (train_step_time: 1.47900s, train_average_time: 0.87514s);(grad_norm_mean: nan, grad_norm_step: 0.48545) +2023-10-26 16:16:19 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5780, lr:1.099109e-08, step_mean_loss:0.00042649611714296043, average_loss:0.04327353600322356), time, (train_step_time: 1.45553s, train_average_time: 0.87676s);(grad_norm_mean: nan, grad_norm_step: 0.06219) +2023-10-26 16:16:47 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5800, lr:6.182981e-09, step_mean_loss:0.0005311911227181554, average_loss:0.043173101326631165), time, (train_step_time: 1.54228s, train_average_time: 0.87841s);(grad_norm_mean: nan, grad_norm_step: 0.05936) +2023-10-26 16:17:15 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5820, lr:2.748149e-09, step_mean_loss:0.0002601183077786118, average_loss:0.04303075737200952), time, (train_step_time: 1.12367s, train_average_time: 0.88022s);(grad_norm_mean: nan, grad_norm_step: 0.02425) +2023-10-26 16:17:43 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5840, lr:6.870608e-10, step_mean_loss:0.00574709614738822, average_loss:0.042892935802141846), time, (train_step_time: 1.50849s, train_average_time: 0.88204s);(grad_norm_mean: nan, grad_norm_step: 0.88605) +2023-10-26 16:18:09 INFO root donut_experiment.py:243 - experiment:nougat-base; train, (epoch: 19, steps: 5860, lr:0.000000e+00, step_mean_loss:0.0004027895338367671, average_loss:0.04275108012854163), time, (train_step_time: 1.50261s, train_average_time: 0.88348s);(grad_norm_mean: nan, grad_norm_step: 0.05125) +2023-10-26 16:18:09 INFO root donut_experiment.py:264 - experiment:nougat-base; eval, (epoch: 19, steps: 5860); +2023-10-26 16:18:13 INFO root donut_experiment.py:66 - batch inference time:3.378760814666748 s +2023-10-26 16:18:16 INFO root donut_experiment.py:66 - batch inference time:2.676053762435913 s +2023-10-26 16:18:18 INFO root donut_experiment.py:66 - batch inference time:1.7379047870635986 s +2023-10-26 16:18:19 INFO root donut_experiment.py:66 - batch inference time:1.2310738563537598 s +2023-10-26 16:18:21 INFO root donut_experiment.py:66 - batch inference time:2.220395803451538 s +2023-10-26 16:18:23 INFO root donut_experiment.py:66 - batch inference time:1.4803249835968018 s +2023-10-26 16:18:24 INFO root donut_experiment.py:66 - batch inference time:1.069157361984253 s +2023-10-26 16:18:26 INFO root donut_experiment.py:66 - batch inference time:1.5836141109466553 s +2023-10-26 16:18:27 INFO root donut_experiment.py:66 - batch inference time:1.7490077018737793 s +2023-10-26 16:18:30 INFO root donut_experiment.py:66 - batch inference time:2.346395969390869 s +2023-10-26 16:18:32 INFO root donut_experiment.py:66 - batch inference time:2.11633563041687 s +2023-10-26 16:18:33 INFO root donut_experiment.py:66 - batch inference time:0.8913905620574951 s +2023-10-26 16:18:35 INFO root donut_experiment.py:66 - batch inference time:2.43469500541687 s +2023-10-26 16:18:37 INFO root donut_experiment.py:66 - batch inference time:1.7651002407073975 s +2023-10-26 16:18:39 INFO root donut_experiment.py:66 - batch inference time:2.1310105323791504 s +2023-10-26 16:18:41 INFO root donut_experiment.py:66 - batch inference time:1.780095100402832 s +2023-10-26 16:18:43 INFO root donut_experiment.py:66 - batch inference time:1.92588210105896 s +2023-10-26 16:18:45 INFO root donut_experiment.py:66 - batch inference time:1.881756067276001 s +2023-10-26 16:18:45 INFO root donut_experiment.py:75 - evaluating... +2023-10-26 16:18:45 INFO root donut_experiment.py:76 - token_acc: 0.40559220860823125; edit_dis: 0.1118061539545454 +2023-10-26 16:18:51 INFO root base_experiment.py:340 - model successfully saved to /home/zhujun/Code/nougat-latex-ocr/models/nougat-base_epoch19_step5860_lr0.000000e+00_avg_loss0.04275_token_acc0.40559_edit_dis0.11181.pth