==================================================================================================== import os import sys with open(sys.argv[0]) as f: code = f.read() # read the code of this file ASAP, for logging import uuid import glob import time from dataclasses import dataclass import numpy as np import torch from torch import nn import torch.nn.functional as F import torch.distributed as dist import torch._inductor.config as config from torch.nn.parallel import DistributedDataParallel as DDP # ----------------------------------------------------------------------------- # Muon optimizer def zeropower_via_svd(G, steps=None): U, S, V = G.svd() return U @ V.T @torch.compile def zeropower_via_newtonschulz5(G, steps=10, eps=1e-7): """ Newton-Schulz iteration to compute the zeroth power / orthogonalization of G. We opt to use a quintic iteration whose coefficients are selected to maximize the slope at zero. For the purpose of minimizing steps, it turns out to be empirically effective to keep increasing the slope at zero even beyond the point where the iteration no longer converges all the way to one everywhere on the interval. This iteration therefore does not produce UV^T but rather something like US'V^T where S' is diagonal with S_{ii}' \sim Uniform(0.5, 1.5), which turns out not to hurt model performance at all relative to UV^T, where USV^T = G is the SVD. """ assert len(G.shape) == 2 a, b, c = (3.4445, -4.7750, 2.0315) X = G.bfloat16() X /= (X.norm() + eps) # ensure top singular value <= 1 if G.size(0) > G.size(1): X = X.T for _ in range(steps): A = X @ X.T B = A @ X X = a * X + b * B + c * A @ B if G.size(0) > G.size(1): X = X.T return X zeropower_backends = dict(svd=zeropower_via_svd, newtonschulz5=zeropower_via_newtonschulz5) class Muon(torch.optim.Optimizer): """ Muon - MomentUm Orthogonalized by Newton-schulz Muon internally runs standard SGD-momentum, and then performs an orthogonalization post- processing step, in which each 2D parameter's update is replaced with the nearest orthogonal matrix. To efficiently orthogonalize each update, we use a Newton-Schulz iteration, which has the advantage that it can be stably run in bfloat16 on the GPU. Some warnings: - This optimizer assumes that all parameters passed in are 2D. - It should not be used for the embedding layer, the final fully connected layer, or any {0,1}-D parameters; those should all be optimized by a standard method (e.g., AdamW). - To use it with 4D convolutional filters, it works well to just flatten their last 3 dimensions. - We believe it is unlikely to work well for training with small batch size. - We believe it may not work well for finetuning pretrained models, but we haven't tested this. - We have not yet tried this optimizer for training scenarios larger than NanoGPT (124M). Arguments: lr: The learning rate used by the internal SGD. momentum: The momentum used by the internal SGD. nesterov: Whether to use Nesterov-style momentum in the internal SGD. (recommended) backend: The chosen backend for the orthogonalization step. (recommended: 'newtonschulz5') backend_steps: The number of iteration steps to use in the backend, if it is iterative. """ def __init__(self, params, lr=0.02, momentum=0.95, nesterov=True, backend='newtonschulz5', backend_steps=5): defaults = dict(lr=lr, momentum=momentum, nesterov=nesterov, backend=backend, backend_steps=backend_steps) super().__init__(params, defaults) def step(self): for group in self.param_groups: lr = group['lr'] momentum = group['momentum'] zeropower_backend = zeropower_backends[group['backend']] # generate weight updates in distributed fashion total_params = sum(p.numel() for p in group['params']) updates_flat = torch.zeros(total_params, device='cuda', dtype=torch.bfloat16) curr_idx = 0 for i, p in enumerate(group['params']): # luckily this will perfectly distribute a transformer with multiple of 4 layers to 8 GPUs if i % int(os.environ['WORLD_SIZE']) == int(os.environ['RANK']): g = p.grad assert g is not None state = self.state[p] if 'momentum_buffer' not in state: state['momentum_buffer'] = torch.zeros_like(g) buf = state['momentum_buffer'] buf.mul_(momentum).add_(g) if group['nesterov']: g = g.add(buf, alpha=momentum) g = zeropower_backend(g, steps=group['backend_steps']) g *= max(1, g.size(0)/g.size(1))**0.5 updates_flat[curr_idx:curr_idx+p.numel()] = g.flatten() curr_idx += p.numel() # sync updates across devices. we are not memory-constrained so can do this simple deserialization dist.all_reduce(updates_flat, op=dist.ReduceOp.SUM) # deserialize and apply updates curr_idx = 0 for p in group['params']: g = updates_flat[curr_idx:curr_idx+p.numel()].view_as(p.data).type_as(p.data) p.data.add_(g, alpha=-lr) curr_idx += p.numel() # ----------------------------------------------------------------------------- # PyTorch nn.Module definitions for the GPT-2 model class Rotary(torch.nn.Module): def __init__(self, dim, base=10000): super().__init__() self.inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim)) self.seq_len_cached = None self.cos_cached = None self.sin_cached = None def forward(self, x): seq_len = x.shape[1] if seq_len != self.seq_len_cached: self.seq_len_cached = seq_len t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq) freqs = torch.outer(t, self.inv_freq).to(x.device) self.cos_cached = freqs.cos().bfloat16() self.sin_cached = freqs.sin().bfloat16() return self.cos_cached[None, :, None, :], self.sin_cached[None, :, None, :] def apply_rotary_emb(x, cos, sin): assert x.ndim == 4 # multihead attention d = x.shape[3]//2 x1 = x[..., :d] x2 = x[..., d:] y1 = x1 * cos + x2 * sin y2 = x1 * (-sin) + x2 * cos return torch.cat([y1, y2], 3).type_as(x) class CastedLinear(nn.Linear): def forward(self, x): return F.linear(x, self.weight.to(x.dtype)) class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() self.n_head = config.n_head self.n_embd = config.n_embd self.head_dim = self.n_embd // self.n_head assert self.n_embd % self.n_head == 0 self.c_q = CastedLinear(self.n_embd, self.n_embd, bias=False) self.c_k = CastedLinear(self.n_embd, self.n_embd, bias=False) self.c_v = CastedLinear(self.n_embd, self.n_embd, bias=False) # output projection self.c_proj = CastedLinear(self.n_embd, self.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 self.rotary = Rotary(self.head_dim) self.lamb = nn.Parameter(torch.tensor(0.5)) # @Grad62304977 def forward(self, x, v1=None): B, T, C = x.size() # batch size, sequence length, embedding dimensionality (n_embd) q = self.c_q(x).view(B, T, self.n_head, self.head_dim) k = self.c_k(x).view(B, T, self.n_head, self.head_dim) v = self.c_v(x).view(B, T, self.n_head, self.head_dim) if v1 is None: v1 = v # This happens if we are in the first block. v needs to be accessed by subsequent blocks v = (1 - self.lamb) * v + self.lamb * v1.view_as(v) # @Grad62304977 cos, sin = self.rotary(q) q, k = F.rms_norm(q, (q.size(-1),)), F.rms_norm(k, (k.size(-1),)) # QK norm suggested by @Grad62304977 q, k = apply_rotary_emb(q, cos, sin), apply_rotary_emb(k, cos, sin) y = F.scaled_dot_product_attention(q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), is_causal=True) y = y.transpose(1, 2).contiguous().view_as(x) # re-assemble all head outputs side by side y = self.c_proj(y) return y, v1 class MLP(nn.Module): def __init__(self, config): super().__init__() self.c_fc = CastedLinear(config.n_embd, 4 * config.n_embd, bias=False) self.c_proj = CastedLinear(4 * config.n_embd, config.n_embd, bias=False) self.c_proj.weight.data.zero_() # zero init suggested by @Grad62304977 def forward(self, x): x = self.c_fc(x) x = F.relu(x).square() # https://arxiv.org/abs/2109.08668v2; ~1-2% better than GELU; suggested by @SKYLINEZ007 and @Grad62304977 x = self.c_proj(x) return x class Block(nn.Module): def __init__(self, config): super().__init__() self.attn = CausalSelfAttention(config) self.mlp = MLP(config) self.lambdas = nn.Parameter(torch.tensor([1., 0.])) def forward(self, x, v1, x0): x = self.lambdas[0] * x + self.lambdas[1] * x0 x1, v1 = self.attn(F.rms_norm(x, (x.size(-1),)), v1) x = x + x1 x = x + self.mlp(F.rms_norm(x, (x.size(-1),))) return x, v1 # ----------------------------------------------------------------------------- # The main GPT-2 model @dataclass class GPTConfig: vocab_size : int = 50304 n_layer : int = 12 n_head : int = 6 # head dim 128 suggested by @Grad62304977 n_embd : int = 768 class GPT(nn.Module): def __init__(self, config): super().__init__() self.config = config self.transformer = nn.ModuleDict(dict( wte = nn.Embedding(config.vocab_size, config.n_embd), h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]), )) self.lm_head = CastedLinear(config.n_embd, config.vocab_size, bias=False) self.lm_head.weight.data.zero_() # @Grad62304977 def forward(self, idx, target): # forward the GPT model itself x = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd) x = F.rms_norm(x, (x.size(-1),)) # @Grad62304977 x0 = x v1 = None for block in self.transformer.h: x, v1 = block(x, v1, x0) x = F.rms_norm(x, (x.size(-1),)) logits = self.lm_head(x) logits = 30 * torch.tanh(logits / 30) # @Grad62304977 logits = logits.float() loss = F.cross_entropy(logits.view(-1, logits.size(-1)), target.view(-1)) return loss.float() # ----------------------------------------------------------------------------- # Our own simple Distributed Data Loader def _peek_data_shard(filename): # only reads the header, returns header data with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) if header[0] != 20240520: print("ERROR: magic number mismatch in the data .bin file!") print("---> HINT: Are you passing in a correct file with --input_bin?") print("---> HINT: Dataset encoding changed recently, re-run data prepro or refer again to README") print("---> HINT: For example re-run: `python dev/data/tinyshakespeare.py`, then re-try") exit(1) assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) return ntok # for now just return the number of tokens def _load_data_shard(filename): with open(filename, "rb") as f: # first read the header, which is 256 int32 integers (4 bytes each) header = np.frombuffer(f.read(256*4), dtype=np.int32) assert header[0] == 20240520, "magic number mismatch in the data .bin file" assert header[1] == 1, "unsupported version" ntok = header[2] # number of tokens (claimed) # the rest of it are tokens, stored as uint16 tokens = np.frombuffer(f.read(), dtype=np.uint16) assert len(tokens) == ntok, "number of tokens read does not match header?" return tokens class DistributedDataLoader: def __init__(self, filename_pattern, B, T, process_rank, num_processes): self.process_rank = process_rank self.num_processes = num_processes self.B = B self.T = T # glob files that match the pattern self.files = sorted(glob.glob(filename_pattern)) assert len(self.files) > 0, f"did not find any files that match the pattern {filename_pattern}" # load and validate all data shards, count number of tokens in total ntok_total = 0 for fname in self.files: shard_ntok = _peek_data_shard(fname) assert shard_ntok >= num_processes * B * T + 1 ntok_total += int(shard_ntok) self.ntok_total = ntok_total # kick things off self.reset() def reset(self): self.current_shard = 0 self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def advance(self): # advance to next data shard self.current_shard = (self.current_shard + 1) % len(self.files) self.current_position = self.process_rank * self.B * self.T self.tokens = _load_data_shard(self.files[self.current_shard]) def next_batch(self): B = self.B T = self.T buf = self.tokens[self.current_position : self.current_position+B*T+1] buf = torch.tensor(buf.astype(np.int32), dtype=torch.long) x = (buf[:-1]).view(B, T) # inputs y = (buf[1:]).view(B, T) # targets # advance current position and load next shard if necessary self.current_position += B * T * self.num_processes if self.current_position + (B * T * self.num_processes + 1) > len(self.tokens): self.advance() return x.cuda(), y.cuda() # ----------------------------------------------------------------------------- # int main @dataclass class Hyperparameters: # data hyperparams input_bin : str = 'data/fineweb10B/fineweb_train_*.bin' # input .bin to train on input_val_bin : str = 'data/fineweb10B/fineweb_val_*.bin' # input .bin to eval validation loss on # optimization hyperparams batch_size : int = 8*64 # batch size, in sequences, across all devices device_batch_size : int = 16 # batch size, in sequences, per device sequence_length : int = 1024 # sequence length, in tokens num_iterations : int = 3242 # number of iterations to run warmup_iters : int = 0 warmdown_iters : int = 926 # number of iterations of linear warmup/warmdown for triangular or trapezoidal schedule weight_decay : float = 0 # evaluation and logging hyperparams val_loss_every : int = 125 # every how many steps to evaluate val loss? 0 for only at the end val_tokens : int = 10485760 # how many tokens of validation data? it's important to keep this fixed for consistent comparisons save_every : int = 0 # every how many steps to save the checkpoint? 0 for only at the end args = Hyperparameters() # set up DDP (distributed data parallel). torchrun sets this env variable assert torch.cuda.is_available() dist.init_process_group(backend='nccl') ddp_rank = int(os.environ['RANK']) ddp_local_rank = int(os.environ['LOCAL_RANK']) ddp_world_size = int(os.environ['WORLD_SIZE']) device = f'cuda:{ddp_local_rank}' torch.cuda.set_device(device) print(f"using device: {device}") master_process = (ddp_rank == 0) # this process will do logging, checkpointing etc. # convenience variables B, T = args.device_batch_size, args.sequence_length # calculate the number of steps to take in the val loop. assert args.val_tokens % (B * T * ddp_world_size) == 0 val_steps = args.val_tokens // (B * T * ddp_world_size) # calculate the steps of gradient accumulation required to attain the desired global batch size. assert args.batch_size % (B * ddp_world_size) == 0 train_accumulation_steps = args.batch_size // (B * ddp_world_size) # load tokens train_loader = DistributedDataLoader(args.input_bin, B, T, ddp_rank, ddp_world_size) val_loader = DistributedDataLoader(args.input_val_bin, B, T, ddp_rank, ddp_world_size) if master_process: print(f"Training DataLoader: total number of tokens: {train_loader.ntok_total} across {len(train_loader.files)} files") print(f"Validation DataLoader: total number of tokens: {val_loader.ntok_total} across {len(val_loader.files)} files") x, y = train_loader.next_batch() # there are only 50257 unique GPT-2 tokens; we extend to nearest multiple of 128 for efficiency. suggested to me by @Grad62304977. # this originates from Karpathy's experiments. num_vocab = 50304 model = GPT(GPTConfig(vocab_size=num_vocab, n_layer=12, n_head=6, n_embd=768)) model = model.cuda().bfloat16() for m in model.modules(): if isinstance(m, CastedLinear): m.float() if hasattr(config, "coordinate_descent_tuning"): config.coordinate_descent_tuning = True # suggested by @Chillee model = torch.compile(model) # here we wrap model into DDP container model = DDP(model, device_ids=[ddp_local_rank]) raw_model = model.module # always contains the "raw" unwrapped model # CUDNN attention is ~4ms faster than Flash, but doesn't get selected by default in PyTorch 2.5.1 from torch.backends.cuda import enable_cudnn_sdp, enable_flash_sdp, enable_math_sdp, enable_mem_efficient_sdp enable_cudnn_sdp(True) enable_flash_sdp(False) enable_mem_efficient_sdp(False) enable_math_sdp(False) # init the optimizer(s) optimizer1 = torch.optim.Adam([raw_model.transformer.wte.weight], lr=0.3, betas=(0.9, 0.95), fused=True) optimizer2 = torch.optim.Adam([raw_model.lm_head.weight], lr=0.002, betas=(0.9, 0.95), fused=True) params = list(raw_model.transformer.h.parameters()) matrix_params = [p for p in params if p.ndim == 2] scalar_params = [p for p in params if p.ndim < 2] optimizer3 = Muon(matrix_params, lr=0.02, momentum=0.95) optimizer4 = torch.optim.Adam(scalar_params, lr=0.02, betas=(0.9, 0.95), fused=True) # note that this learning rate is neither sensitive nor tuned optimizers = [optimizer1, optimizer2, optimizer3, optimizer4] # learning rate decay scheduler (linear warmup and warmdown) def get_lr(it): assert it <= args.num_iterations # 1) linear warmup for warmup_iters steps if it < args.warmup_iters: return (it+1) / args.warmup_iters # 2) constant lr for a while elif it < args.num_iterations - args.warmdown_iters: return 1.0 # 3) linear warmdown else: decay_ratio = (args.num_iterations - it) / args.warmdown_iters return decay_ratio schedulers = [torch.optim.lr_scheduler.LambdaLR(opt, get_lr) for opt in optimizers] # begin logging if master_process: run_id = str(uuid.uuid4()) logdir = 'logs/%s/' % run_id os.makedirs(logdir, exist_ok=True) logfile = 'logs/%s.txt' % run_id # create the log file with open(logfile, "w") as f: # begin the log by printing this file (the Python code) f.write('='*100 + '\n') f.write(code) f.write('='*100 + '\n') # log information about the hardware/software environment this is running on # and print the full `nvidia-smi` to file f.write(f"Running pytorch {torch.version.__version__} compiled for CUDA {torch.version.cuda}\nnvidia-smi:\n") import subprocess result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) f.write(f'{result.stdout}\n') f.write('='*100 + '\n') training_time_ms = 0 # start the clock torch.cuda.synchronize() t0 = time.time() # begin training train_loader.reset() for step in range(args.num_iterations + 1): last_step = (step == args.num_iterations) # This effectively ignores timing first 10 steps, which are slower for weird reasons. # Alternately, and slightly more correctly in terms of benchmarking, we could do 10 # steps with dummy data first, and then re-initialize the model and reset the loader. if step == 10: training_time_ms = 0 t0 = time.time() timed_steps = float('nan') if step <= 11 else (step - 10) + 1 # <= 11 to avoid bug in val # once in a while evaluate the validation dataset if (last_step or (args.val_loss_every > 0 and step % args.val_loss_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # run validation batches model.eval() val_loader.reset() val_loss = 0.0 for _ in range(val_steps): with torch.no_grad(): x_val, y_val = val_loader.next_batch() val_loss += model(x_val, y_val) dist.all_reduce(val_loss, op=dist.ReduceOp.AVG) val_loss /= val_steps # log val loss to console and to logfile if master_process: print(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms') with open(logfile, "a") as f: f.write(f'step:{step}/{args.num_iterations} val_loss:{val_loss:.4f} train_time:{training_time_ms:.0f}ms step_avg:{training_time_ms/(timed_steps-1):.2f}ms\n') # start the clock again torch.cuda.synchronize() t0 = time.time() if master_process and (last_step or (args.save_every > 0 and step % args.save_every == 0)): # stop the clock torch.cuda.synchronize() training_time_ms += 1000 * (time.time() - t0) # save the state of the training process log = dict(step=step, code=code, model=raw_model.state_dict(), optimizers=[opt.state_dict() for opt in optimizers]) torch.save(log, 'logs/%s/state_step%06d.pt' % (run_id, step)) # start the clock again torch.cuda.synchronize() t0 = time.time() # bit confusing: we want to make sure to eval on 0th iteration # but also after the very last iteration. so we loop for step <= num_iterations # instead of just < num_iterations (one extra due to <=), only to do # the validation/sampling one last time, and then we break right here as we're done. if last_step: break # --------------- TRAINING SECTION BEGIN ----------------- model.train() for i in range(1, train_accumulation_steps+1): # forward pass loss = model(x, y) train_loss = loss.detach() # advance the dataset for the next batch x, y = train_loader.next_batch() # backward pass if i < train_accumulation_steps: with model.no_sync(): # there's no need to sync gradients every accumulation step loss.backward() else: loss.backward() # just sync on the last step for p in model.parameters(): p.grad /= train_accumulation_steps # momentum warmup for Muon frac = min(step/500, 1) optimizer3.param_groups[0]['momentum'] = (1 - frac) * 0.85 + frac * 0.95 # step the optimizers and schedulers for opt, sched in zip(optimizers, schedulers): opt.step() sched.step() # null the gradients model.zero_grad(set_to_none=True) # --------------- TRAINING SECTION END ------------------- # everything that follows now is just diagnostics, prints, logging, etc. #dist.all_reduce(train_loss, op=dist.ReduceOp.AVG) # all-reducing the training loss would be more correct in terms of logging, but slower if master_process: approx_time = training_time_ms + 1000 * (time.time() - t0) print(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms") with open(logfile, "a") as f: f.write(f"step:{step+1}/{args.num_iterations} train_loss:{train_loss.item():.4f} train_time:{approx_time:.0f}ms step_avg:{approx_time/timed_steps:.2f}ms\n") if master_process: print(f"peak memory consumption: {torch.cuda.max_memory_allocated() // 1024 // 1024} MiB") # ------------------------------------------------------------------------- # clean up nice dist.destroy_process_group() ==================================================================================================== Running pytorch 2.5.1+cu124 compiled for CUDA 12.4 nvidia-smi: Sun Nov 10 00:59:47 2024 +-----------------------------------------------------------------------------------------+ | NVIDIA-SMI 550.90.07 Driver Version: 550.90.07 CUDA Version: 12.4 | |-----------------------------------------+------------------------+----------------------+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC | | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. | | | | MIG M. | |=========================================+========================+======================| | 0 NVIDIA GeForce RTX 3090 On | 00000000:01:00.0 Off | N/A | | 0% 36C P2 108W / 250W | 2239MiB / 24576MiB | 0% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 1 NVIDIA GeForce RTX 3090 On | 00000000:02:00.0 Off | N/A | | 0% 37C P2 118W / 250W | 2239MiB / 24576MiB | 31% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 2 NVIDIA GeForce RTX 3090 On | 00000000:03:00.0 Off | N/A | | 0% 39C P2 132W / 250W | 2239MiB / 24576MiB | 0% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 3 NVIDIA GeForce RTX 3090 On | 00000000:04:00.0 Off | N/A | | 33% 35C P2 130W / 250W | 2239MiB / 24576MiB | 0% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 4 NVIDIA GeForce RTX 3090 On | 00000000:05:00.0 Off | N/A | | 34% 37C P2 130W / 250W | 2239MiB / 24576MiB | 0% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 5 NVIDIA GeForce RTX 3090 On | 00000000:81:00.0 Off | N/A | | 0% 38C P2 133W / 250W | 2239MiB / 24576MiB | 8% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 6 NVIDIA GeForce RTX 3090 On | 00000000:82:00.0 Off | N/A | | 0% 35C P2 136W / 250W | 2239MiB / 24576MiB | 0% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ | 7 NVIDIA GeForce RTX 3090 On | 00000000:83:00.0 Off | N/A | | 0% 35C P2 119W / 250W | 2239MiB / 24576MiB | 16% Default | | | | N/A | +-----------------------------------------+------------------------+----------------------+ +-----------------------------------------------------------------------------------------+ | Processes: | | GPU GI CI PID Type Process name GPU Memory | | ID ID Usage | |=========================================================================================| +-----------------------------------------------------------------------------------------+ ==================================================================================================== step:0/3242 val_loss:10.8258 train_time:382ms step_avg:nanms step:1/3242 train_loss:10.8258 train_time:101423ms step_avg:nanms step:2/3242 train_loss:10.5774 train_time:103367ms step_avg:nanms step:3/3242 train_loss:9.9378 train_time:105482ms step_avg:nanms step:4/3242 train_loss:8.9455 train_time:107581ms step_avg:nanms step:5/3242 train_loss:8.1827 train_time:109692ms step_avg:nanms step:6/3242 train_loss:7.3693 train_time:111802ms step_avg:nanms step:7/3242 train_loss:7.0055 train_time:113915ms step_avg:nanms step:8/3242 train_loss:5.9723 train_time:116026ms step_avg:nanms step:9/3242 train_loss:6.9704 train_time:118142ms step_avg:nanms step:10/3242 train_loss:6.7147 train_time:120252ms step_avg:nanms step:11/3242 train_loss:6.6959 train_time:1703ms step_avg:nanms step:12/3242 train_loss:6.5138 train_time:3816ms step_avg:nanms step:13/3242 train_loss:6.5906 train_time:5924ms step_avg:1974.58ms step:14/3242 train_loss:6.5442 train_time:8039ms step_avg:2009.84ms step:15/3242 train_loss:6.4742 train_time:10153ms step_avg:2030.66ms step:16/3242 train_loss:6.3138 train_time:12271ms step_avg:2045.20ms step:17/3242 train_loss:6.1143 train_time:14378ms step_avg:2054.00ms step:18/3242 train_loss:6.5037 train_time:16499ms step_avg:2062.40ms step:19/3242 train_loss:6.0189 train_time:18613ms step_avg:2068.09ms step:20/3242 train_loss:6.3428 train_time:20722ms step_avg:2072.21ms step:21/3242 train_loss:6.2252 train_time:22841ms step_avg:2076.50ms step:22/3242 train_loss:6.1434 train_time:24957ms step_avg:2079.73ms step:23/3242 train_loss:6.2748 train_time:27075ms step_avg:2082.68ms step:24/3242 train_loss:5.9630 train_time:29194ms step_avg:2085.27ms step:25/3242 train_loss:6.0908 train_time:31305ms step_avg:2086.99ms step:26/3242 train_loss:6.1666 train_time:33412ms step_avg:2088.24ms step:27/3242 train_loss:6.1281 train_time:35527ms step_avg:2089.83ms step:28/3242 train_loss:6.0608 train_time:37644ms step_avg:2091.34ms step:29/3242 train_loss:5.9877 train_time:39759ms step_avg:2092.59ms step:30/3242 train_loss:6.3199 train_time:41870ms step_avg:2093.51ms step:31/3242 train_loss:6.1545 train_time:43988ms step_avg:2094.66ms step:32/3242 train_loss:5.8008 train_time:46112ms step_avg:2096.02ms step:33/3242 train_loss:5.8494 train_time:48237ms step_avg:2097.25ms step:34/3242 train_loss:5.8021 train_time:50354ms step_avg:2098.07ms step:35/3242 train_loss:6.1004 train_time:52471ms step_avg:2098.85ms step:36/3242 train_loss:5.9750 train_time:54588ms step_avg:2099.53ms step:37/3242 train_loss:5.9919 train_time:56697ms step_avg:2099.89ms step:38/3242 train_loss:6.1530 train_time:58813ms step_avg:2100.46ms step:39/3242 train_loss:6.1604 train_time:60926ms step_avg:2100.89ms step:40/3242 train_loss:5.5553 train_time:63039ms step_avg:2101.31ms step:41/3242 train_loss:6.0810 train_time:65155ms step_avg:2101.78ms step:42/3242 train_loss:5.7906 train_time:67276ms step_avg:2102.38ms step:43/3242 train_loss:5.9363 train_time:69389ms step_avg:2102.69ms step:44/3242 train_loss:5.6669 train_time:71498ms step_avg:2102.87ms step:45/3242 train_loss:5.5522 train_time:73616ms step_avg:2103.31ms step:46/3242 train_loss:5.6694 train_time:75736ms step_avg:2103.77ms step:47/3242 train_loss:5.6245 train_time:77857ms step_avg:2104.24ms step:48/3242 train_loss:5.7115 train_time:79973ms step_avg:2104.55ms step:49/3242 train_loss:5.5189 train_time:82086ms step_avg:2104.77ms step:50/3242 train_loss:5.5554 train_time:84202ms step_avg:2105.04ms step:51/3242 train_loss:5.4456 train_time:86319ms step_avg:2105.34ms step:52/3242 train_loss:5.7622 train_time:88435ms step_avg:2105.60ms step:53/3242 train_loss:5.5252 train_time:90554ms step_avg:2105.90ms step:54/3242 train_loss:5.5044 train_time:92670ms step_avg:2106.14ms step:55/3242 train_loss:5.4659 train_time:94795ms step_avg:2106.56ms step:56/3242 train_loss:5.4679 train_time:96915ms step_avg:2106.85ms step:57/3242 train_loss:5.3641 train_time:99031ms step_avg:2107.04ms step:58/3242 train_loss:5.4351 train_time:101153ms step_avg:2107.36ms step:59/3242 train_loss:5.3387 train_time:103276ms step_avg:2107.67ms step:60/3242 train_loss:5.4442 train_time:105385ms step_avg:2107.70ms step:61/3242 train_loss:5.4330 train_time:107508ms step_avg:2108.00ms step:62/3242 train_loss:5.3028 train_time:109627ms step_avg:2108.21ms step:63/3242 train_loss:5.3533 train_time:111741ms step_avg:2108.32ms step:64/3242 train_loss:5.2518 train_time:113865ms step_avg:2108.61ms step:65/3242 train_loss:5.1106 train_time:115980ms step_avg:2108.72ms step:66/3242 train_loss:5.2589 train_time:118091ms step_avg:2108.77ms step:67/3242 train_loss:5.2065 train_time:120209ms step_avg:2108.92ms step:68/3242 train_loss:5.3377 train_time:122325ms step_avg:2109.06ms step:69/3242 train_loss:5.2252 train_time:124440ms step_avg:2109.15ms step:70/3242 train_loss:5.2604 train_time:126562ms step_avg:2109.36ms step:71/3242 train_loss:5.2228 train_time:128678ms step_avg:2109.48ms step:72/3242 train_loss:5.1890 train_time:130798ms step_avg:2109.64ms step:73/3242 train_loss:5.2928 train_time:132917ms step_avg:2109.79ms step:74/3242 train_loss:5.0079 train_time:135039ms step_avg:2109.98ms step:75/3242 train_loss:5.2693 train_time:137157ms step_avg:2110.10ms step:76/3242 train_loss:5.2311 train_time:139271ms step_avg:2110.16ms step:77/3242 train_loss:5.3761 train_time:141395ms step_avg:2110.37ms step:78/3242 train_loss:5.1247 train_time:143516ms step_avg:2110.53ms step:79/3242 train_loss:5.0737 train_time:145630ms step_avg:2110.57ms step:80/3242 train_loss:5.1913 train_time:147750ms step_avg:2110.71ms step:81/3242 train_loss:5.0469 train_time:149873ms step_avg:2110.89ms step:82/3242 train_loss:5.3454 train_time:151992ms step_avg:2111.00ms step:83/3242 train_loss:5.1707 train_time:154111ms step_avg:2111.11ms step:84/3242 train_loss:4.9371 train_time:156225ms step_avg:2111.15ms step:85/3242 train_loss:5.0544 train_time:158344ms step_avg:2111.25ms step:86/3242 train_loss:5.1726 train_time:160463ms step_avg:2111.35ms step:87/3242 train_loss:5.1411 train_time:162589ms step_avg:2111.55ms step:88/3242 train_loss:4.9101 train_time:164704ms step_avg:2111.59ms step:89/3242 train_loss:5.0530 train_time:166824ms step_avg:2111.69ms step:90/3242 train_loss:4.9850 train_time:168946ms step_avg:2111.83ms step:91/3242 train_loss:5.0930 train_time:171064ms step_avg:2111.90ms step:92/3242 train_loss:5.0010 train_time:173183ms step_avg:2111.98ms step:93/3242 train_loss:5.2176 train_time:175296ms step_avg:2112.01ms step:94/3242 train_loss:4.9870 train_time:177417ms step_avg:2112.11ms step:95/3242 train_loss:4.7550 train_time:179540ms step_avg:2112.24ms step:96/3242 train_loss:4.8897 train_time:181655ms step_avg:2112.27ms step:97/3242 train_loss:5.1393 train_time:183777ms step_avg:2112.38ms step:98/3242 train_loss:4.9002 train_time:185904ms step_avg:2112.54ms step:99/3242 train_loss:4.8429 train_time:188019ms step_avg:2112.57ms step:100/3242 train_loss:4.9818 train_time:190137ms step_avg:2112.64ms step:101/3242 train_loss:4.8077 train_time:192259ms step_avg:2112.73ms step:102/3242 train_loss:4.9317 train_time:194374ms step_avg:2112.76ms step:103/3242 train_loss:4.6536 train_time:196496ms step_avg:2112.86ms step:104/3242 train_loss:4.6406 train_time:198617ms step_avg:2112.95ms step:105/3242 train_loss:5.3221 train_time:200731ms step_avg:2112.96ms step:106/3242 train_loss:5.9453 train_time:202851ms step_avg:2113.04ms step:107/3242 train_loss:4.7554 train_time:204972ms step_avg:2113.12ms step:108/3242 train_loss:4.7332 train_time:207091ms step_avg:2113.17ms step:109/3242 train_loss:5.3456 train_time:209218ms step_avg:2113.31ms step:110/3242 train_loss:5.0092 train_time:211331ms step_avg:2113.31ms step:111/3242 train_loss:4.6998 train_time:213455ms step_avg:2113.42ms step:112/3242 train_loss:5.0955 train_time:215570ms step_avg:2113.43ms step:113/3242 train_loss:4.3955 train_time:217689ms step_avg:2113.49ms step:114/3242 train_loss:4.9148 train_time:219809ms step_avg:2113.55ms step:115/3242 train_loss:4.8144 train_time:221919ms step_avg:2113.51ms step:116/3242 train_loss:4.8479 train_time:224043ms step_avg:2113.62ms step:117/3242 train_loss:4.7231 train_time:226165ms step_avg:2113.69ms step:118/3242 train_loss:4.7948 train_time:228290ms step_avg:2113.79ms step:119/3242 train_loss:4.5434 train_time:230405ms step_avg:2113.81ms step:120/3242 train_loss:4.7632 train_time:232523ms step_avg:2113.85ms step:121/3242 train_loss:4.6667 train_time:234652ms step_avg:2113.98ms step:122/3242 train_loss:4.7445 train_time:236762ms step_avg:2113.95ms step:123/3242 train_loss:4.6341 train_time:238884ms step_avg:2114.02ms step:124/3242 train_loss:4.7462 train_time:240999ms step_avg:2114.03ms step:125/3242 train_loss:4.4827 train_time:243121ms step_avg:2114.09ms step:125/3242 val_loss:4.7203 train_time:243532ms step_avg:2117.67ms step:126/3242 train_loss:4.5589 train_time:245241ms step_avg:2114.15ms step:127/3242 train_loss:4.7844 train_time:247360ms step_avg:2114.19ms step:128/3242 train_loss:4.7880 train_time:249477ms step_avg:2114.21ms step:129/3242 train_loss:4.7758 train_time:251594ms step_avg:2114.23ms step:130/3242 train_loss:4.7504 train_time:253710ms step_avg:2114.25ms step:131/3242 train_loss:4.5175 train_time:255832ms step_avg:2114.31ms step:132/3242 train_loss:4.6470 train_time:257952ms step_avg:2114.36ms step:133/3242 train_loss:4.7635 train_time:260078ms step_avg:2114.46ms step:134/3242 train_loss:4.6395 train_time:262194ms step_avg:2114.47ms step:135/3242 train_loss:4.4371 train_time:264316ms step_avg:2114.52ms step:136/3242 train_loss:4.5697 train_time:266435ms step_avg:2114.57ms step:137/3242 train_loss:4.7636 train_time:268551ms step_avg:2114.57ms step:138/3242 train_loss:4.5428 train_time:270670ms step_avg:2114.61ms step:139/3242 train_loss:4.7854 train_time:272784ms step_avg:2114.61ms step:140/3242 train_loss:4.5826 train_time:274903ms step_avg:2114.64ms step:141/3242 train_loss:4.4881 train_time:277023ms step_avg:2114.68ms step:142/3242 train_loss:4.5546 train_time:279144ms step_avg:2114.73ms step:143/3242 train_loss:4.7156 train_time:281263ms step_avg:2114.76ms step:144/3242 train_loss:4.6157 train_time:283390ms step_avg:2114.85ms step:145/3242 train_loss:4.6010 train_time:285505ms step_avg:2114.86ms step:146/3242 train_loss:4.6429 train_time:287624ms step_avg:2114.88ms step:147/3242 train_loss:4.4391 train_time:289742ms step_avg:2114.90ms step:148/3242 train_loss:4.4604 train_time:291865ms step_avg:2114.96ms step:149/3242 train_loss:4.5877 train_time:293981ms step_avg:2114.97ms step:150/3242 train_loss:4.4921 train_time:296103ms step_avg:2115.02ms step:151/3242 train_loss:4.5512 train_time:298219ms step_avg:2115.02ms step:152/3242 train_loss:4.4796 train_time:300342ms step_avg:2115.08ms step:153/3242 train_loss:4.5415 train_time:302457ms step_avg:2115.08ms step:154/3242 train_loss:4.4985 train_time:304578ms step_avg:2115.12ms step:155/3242 train_loss:4.2095 train_time:306693ms step_avg:2115.12ms step:156/3242 train_loss:4.3933 train_time:308813ms step_avg:2115.16ms step:157/3242 train_loss:4.6887 train_time:310935ms step_avg:2115.20ms step:158/3242 train_loss:4.4396 train_time:313059ms step_avg:2115.26ms step:159/3242 train_loss:4.5147 train_time:315179ms step_avg:2115.29ms step:160/3242 train_loss:4.3278 train_time:317294ms step_avg:2115.29ms step:161/3242 train_loss:4.2670 train_time:319415ms step_avg:2115.33ms step:162/3242 train_loss:4.4822 train_time:321540ms step_avg:2115.39ms step:163/3242 train_loss:4.4199 train_time:323654ms step_avg:2115.38ms step:164/3242 train_loss:4.3540 train_time:325778ms step_avg:2115.44ms step:165/3242 train_loss:4.3405 train_time:327894ms step_avg:2115.45ms step:166/3242 train_loss:4.5672 train_time:330017ms step_avg:2115.49ms step:167/3242 train_loss:4.5475 train_time:332137ms step_avg:2115.53ms step:168/3242 train_loss:4.5596 train_time:334259ms step_avg:2115.57ms step:169/3242 train_loss:4.3608 train_time:336368ms step_avg:2115.52ms step:170/3242 train_loss:4.3476 train_time:338487ms step_avg:2115.55ms step:171/3242 train_loss:3.6764 train_time:340611ms step_avg:2115.59ms step:172/3242 train_loss:4.3171 train_time:342725ms step_avg:2115.59ms step:173/3242 train_loss:4.2759 train_time:344850ms step_avg:2115.64ms step:174/3242 train_loss:4.7857 train_time:346972ms step_avg:2115.69ms step:175/3242 train_loss:4.3976 train_time:349086ms step_avg:2115.67ms step:176/3242 train_loss:4.4331 train_time:351206ms step_avg:2115.70ms step:177/3242 train_loss:4.7170 train_time:353318ms step_avg:2115.68ms step:178/3242 train_loss:4.4232 train_time:355440ms step_avg:2115.72ms step:179/3242 train_loss:4.2649 train_time:357559ms step_avg:2115.73ms step:180/3242 train_loss:4.2347 train_time:359680ms step_avg:2115.77ms step:181/3242 train_loss:4.2388 train_time:361806ms step_avg:2115.82ms step:182/3242 train_loss:4.2478 train_time:363924ms step_avg:2115.84ms step:183/3242 train_loss:4.1559 train_time:366037ms step_avg:2115.82ms step:184/3242 train_loss:4.7181 train_time:368164ms step_avg:2115.88ms step:185/3242 train_loss:4.3235 train_time:370281ms step_avg:2115.89ms step:186/3242 train_loss:4.3490 train_time:372395ms step_avg:2115.88ms step:187/3242 train_loss:4.2133 train_time:374516ms step_avg:2115.91ms step:188/3242 train_loss:4.3264 train_time:376635ms step_avg:2115.93ms step:189/3242 train_loss:4.2206 train_time:378760ms step_avg:2115.98ms step:190/3242 train_loss:4.2502 train_time:380878ms step_avg:2115.99ms step:191/3242 train_loss:4.1693 train_time:383116ms step_avg:2116.66ms step:192/3242 train_loss:4.2480 train_time:385233ms step_avg:2116.66ms step:193/3242 train_loss:4.4002 train_time:387358ms step_avg:2116.71ms step:194/3242 train_loss:4.2867 train_time:389476ms step_avg:2116.72ms step:195/3242 train_loss:4.9303 train_time:391593ms step_avg:2116.72ms step:196/3242 train_loss:4.2364 train_time:393720ms step_avg:2116.78ms step:197/3242 train_loss:4.1538 train_time:395846ms step_avg:2116.82ms step:198/3242 train_loss:4.2131 train_time:397959ms step_avg:2116.80ms step:199/3242 train_loss:4.1908 train_time:400080ms step_avg:2116.83ms step:200/3242 train_loss:4.2337 train_time:402197ms step_avg:2116.83ms step:201/3242 train_loss:4.0140 train_time:404316ms step_avg:2116.84ms step:202/3242 train_loss:4.3344 train_time:406437ms step_avg:2116.86ms step:203/3242 train_loss:4.2586 train_time:408555ms step_avg:2116.87ms step:204/3242 train_loss:4.2029 train_time:410673ms step_avg:2116.87ms step:205/3242 train_loss:4.2030 train_time:412793ms step_avg:2116.89ms step:206/3242 train_loss:4.1450 train_time:414910ms step_avg:2116.89ms step:207/3242 train_loss:4.1685 train_time:417030ms step_avg:2116.91ms step:208/3242 train_loss:4.4303 train_time:419148ms step_avg:2116.91ms step:209/3242 train_loss:4.3124 train_time:421264ms step_avg:2116.90ms step:210/3242 train_loss:4.3942 train_time:423391ms step_avg:2116.96ms step:211/3242 train_loss:4.2261 train_time:425511ms step_avg:2116.97ms step:212/3242 train_loss:4.2953 train_time:427629ms step_avg:2116.98ms step:213/3242 train_loss:4.1563 train_time:429753ms step_avg:2117.01ms step:214/3242 train_loss:4.2233 train_time:431873ms step_avg:2117.02ms step:215/3242 train_loss:4.0192 train_time:433989ms step_avg:2117.02ms step:216/3242 train_loss:4.0711 train_time:436105ms step_avg:2117.01ms step:217/3242 train_loss:4.1623 train_time:438223ms step_avg:2117.02ms step:218/3242 train_loss:4.0778 train_time:440339ms step_avg:2117.02ms step:219/3242 train_loss:4.2189 train_time:442467ms step_avg:2117.07ms step:220/3242 train_loss:4.2668 train_time:444585ms step_avg:2117.07ms step:221/3242 train_loss:4.2517 train_time:446705ms step_avg:2117.09ms step:222/3242 train_loss:4.0999 train_time:448825ms step_avg:2117.10ms step:223/3242 train_loss:3.8366 train_time:450943ms step_avg:2117.10ms step:224/3242 train_loss:4.6873 train_time:453057ms step_avg:2117.09ms step:225/3242 train_loss:4.1361 train_time:455189ms step_avg:2117.16ms step:226/3242 train_loss:3.8550 train_time:457316ms step_avg:2117.21ms step:227/3242 train_loss:4.1910 train_time:459434ms step_avg:2117.21ms step:228/3242 train_loss:4.2577 train_time:461558ms step_avg:2117.24ms step:229/3242 train_loss:4.0672 train_time:463673ms step_avg:2117.23ms step:230/3242 train_loss:4.0982 train_time:465792ms step_avg:2117.24ms step:231/3242 train_loss:4.1485 train_time:467910ms step_avg:2117.24ms step:232/3242 train_loss:3.8542 train_time:470035ms step_avg:2117.27ms step:233/3242 train_loss:4.2951 train_time:472149ms step_avg:2117.26ms step:234/3242 train_loss:4.0259 train_time:474274ms step_avg:2117.30ms step:235/3242 train_loss:3.7392 train_time:476397ms step_avg:2117.32ms step:236/3242 train_loss:4.2177 train_time:478514ms step_avg:2117.32ms step:237/3242 train_loss:4.2257 train_time:480627ms step_avg:2117.30ms step:238/3242 train_loss:3.9698 train_time:482746ms step_avg:2117.31ms step:239/3242 train_loss:4.3302 train_time:484866ms step_avg:2117.32ms step:240/3242 train_loss:4.1833 train_time:486986ms step_avg:2117.33ms step:241/3242 train_loss:3.9999 train_time:489100ms step_avg:2117.32ms step:242/3242 train_loss:3.7538 train_time:491219ms step_avg:2117.32ms step:243/3242 train_loss:4.1753 train_time:493343ms step_avg:2117.35ms step:244/3242 train_loss:3.9924 train_time:495454ms step_avg:2117.33ms step:245/3242 train_loss:4.3998 train_time:497577ms step_avg:2117.35ms step:246/3242 train_loss:4.4477 train_time:499694ms step_avg:2117.35ms step:247/3242 train_loss:4.1146 train_time:501822ms step_avg:2117.39ms step:248/3242 train_loss:4.1786 train_time:503942ms step_avg:2117.40ms step:249/3242 train_loss:4.0115 train_time:506059ms step_avg:2117.40ms step:250/3242 train_loss:3.9028 train_time:508181ms step_avg:2117.42ms step:250/3242 val_loss:4.1020 train_time:508593ms step_avg:2119.14ms step:251/3242 train_loss:4.2248 train_time:510306ms step_avg:2117.45ms step:252/3242 train_loss:4.1979 train_time:512423ms step_avg:2117.45ms step:253/3242 train_loss:3.7065 train_time:514536ms step_avg:2117.43ms step:254/3242 train_loss:4.1741 train_time:516660ms step_avg:2117.46ms step:255/3242 train_loss:4.0405 train_time:518780ms step_avg:2117.47ms step:256/3242 train_loss:4.1912 train_time:520898ms step_avg:2117.47ms step:257/3242 train_loss:3.9918 train_time:523017ms step_avg:2117.48ms step:258/3242 train_loss:3.8470 train_time:525134ms step_avg:2117.48ms step:259/3242 train_loss:4.2802 train_time:527257ms step_avg:2117.50ms step:260/3242 train_loss:4.0913 train_time:529374ms step_avg:2117.49ms step:261/3242 train_loss:4.4239 train_time:531491ms step_avg:2117.50ms step:262/3242 train_loss:4.1597 train_time:533609ms step_avg:2117.50ms step:263/3242 train_loss:4.0886 train_time:535736ms step_avg:2117.53ms step:264/3242 train_loss:4.0720 train_time:537853ms step_avg:2117.53ms step:265/3242 train_loss:3.9197 train_time:539974ms step_avg:2117.54ms step:266/3242 train_loss:4.0690 train_time:542083ms step_avg:2117.51ms step:267/3242 train_loss:3.8499 train_time:544211ms step_avg:2117.55ms step:268/3242 train_loss:4.1209 train_time:546330ms step_avg:2117.56ms step:269/3242 train_loss:3.8296 train_time:548444ms step_avg:2117.54ms step:270/3242 train_loss:4.2143 train_time:550568ms step_avg:2117.57ms step:271/3242 train_loss:4.0743 train_time:552688ms step_avg:2117.58ms step:272/3242 train_loss:3.9091 train_time:554806ms step_avg:2117.58ms step:273/3242 train_loss:4.1477 train_time:556929ms step_avg:2117.60ms step:274/3242 train_loss:4.1245 train_time:559051ms step_avg:2117.62ms step:275/3242 train_loss:4.2860 train_time:561167ms step_avg:2117.61ms step:276/3242 train_loss:3.9515 train_time:563286ms step_avg:2117.62ms step:277/3242 train_loss:3.8861 train_time:565401ms step_avg:2117.61ms step:278/3242 train_loss:4.0836 train_time:567523ms step_avg:2117.62ms step:279/3242 train_loss:3.9254 train_time:569642ms step_avg:2117.63ms step:280/3242 train_loss:4.4883 train_time:571759ms step_avg:2117.62ms step:281/3242 train_loss:4.0926 train_time:573878ms step_avg:2117.63ms step:282/3242 train_loss:4.0321 train_time:575995ms step_avg:2117.63ms step:283/3242 train_loss:3.9302 train_time:578110ms step_avg:2117.62ms step:284/3242 train_loss:3.9619 train_time:580233ms step_avg:2117.64ms step:285/3242 train_loss:4.1222 train_time:582359ms step_avg:2117.67ms step:286/3242 train_loss:4.3761 train_time:584480ms step_avg:2117.68ms step:287/3242 train_loss:3.8850 train_time:586595ms step_avg:2117.67ms step:288/3242 train_loss:4.2305 train_time:588714ms step_avg:2117.67ms step:289/3242 train_loss:4.0367 train_time:590833ms step_avg:2117.68ms step:290/3242 train_loss:4.0351 train_time:592957ms step_avg:2117.70ms step:291/3242 train_loss:4.1703 train_time:595072ms step_avg:2117.69ms step:292/3242 train_loss:3.9372 train_time:597196ms step_avg:2117.72ms step:293/3242 train_loss:3.9109 train_time:599312ms step_avg:2117.71ms step:294/3242 train_loss:4.1557 train_time:601425ms step_avg:2117.69ms step:295/3242 train_loss:3.9323 train_time:603540ms step_avg:2117.68ms step:296/3242 train_loss:4.0279 train_time:605658ms step_avg:2117.68ms step:297/3242 train_loss:3.9861 train_time:607776ms step_avg:2117.68ms step:298/3242 train_loss:4.1105 train_time:609898ms step_avg:2117.70ms step:299/3242 train_loss:4.0187 train_time:612016ms step_avg:2117.70ms step:300/3242 train_loss:4.0672 train_time:614141ms step_avg:2117.73ms step:301/3242 train_loss:4.0092 train_time:616260ms step_avg:2117.73ms step:302/3242 train_loss:3.8668 train_time:618380ms step_avg:2117.74ms step:303/3242 train_loss:3.8429 train_time:620510ms step_avg:2117.78ms step:304/3242 train_loss:3.9806 train_time:622621ms step_avg:2117.76ms step:305/3242 train_loss:5.6433 train_time:624746ms step_avg:2117.78ms step:306/3242 train_loss:3.9400 train_time:626866ms step_avg:2117.79ms step:307/3242 train_loss:3.9069 train_time:628984ms step_avg:2117.79ms step:308/3242 train_loss:4.2430 train_time:631101ms step_avg:2117.79ms step:309/3242 train_loss:3.8919 train_time:633217ms step_avg:2117.78ms step:310/3242 train_loss:4.1163 train_time:635337ms step_avg:2117.79ms step:311/3242 train_loss:3.7945 train_time:637460ms step_avg:2117.81ms step:312/3242 train_loss:3.7561 train_time:639577ms step_avg:2117.81ms step:313/3242 train_loss:4.1396 train_time:641700ms step_avg:2117.82ms step:314/3242 train_loss:4.2161 train_time:643816ms step_avg:2117.82ms step:315/3242 train_loss:3.7615 train_time:645932ms step_avg:2117.81ms step:316/3242 train_loss:3.8338 train_time:648047ms step_avg:2117.80ms step:317/3242 train_loss:4.1245 train_time:650173ms step_avg:2117.83ms step:318/3242 train_loss:3.9248 train_time:652296ms step_avg:2117.84ms step:319/3242 train_loss:4.0056 train_time:654412ms step_avg:2117.84ms step:320/3242 train_loss:4.4600 train_time:656533ms step_avg:2117.85ms step:321/3242 train_loss:3.8870 train_time:658651ms step_avg:2117.85ms step:322/3242 train_loss:4.1026 train_time:660768ms step_avg:2117.85ms step:323/3242 train_loss:4.3769 train_time:662896ms step_avg:2117.88ms step:324/3242 train_loss:4.1011 train_time:665015ms step_avg:2117.88ms step:325/3242 train_loss:4.1338 train_time:667130ms step_avg:2117.87ms step:326/3242 train_loss:3.7518 train_time:669252ms step_avg:2117.89ms step:327/3242 train_loss:3.9554 train_time:671367ms step_avg:2117.88ms step:328/3242 train_loss:4.1045 train_time:673491ms step_avg:2117.90ms step:329/3242 train_loss:3.9930 train_time:675609ms step_avg:2117.90ms step:330/3242 train_loss:3.8239 train_time:677726ms step_avg:2117.89ms step:331/3242 train_loss:3.9220 train_time:679850ms step_avg:2117.91ms step:332/3242 train_loss:4.3190 train_time:681969ms step_avg:2117.92ms step:333/3242 train_loss:3.9020 train_time:684087ms step_avg:2117.92ms step:334/3242 train_loss:3.9874 train_time:686206ms step_avg:2117.92ms step:335/3242 train_loss:4.0978 train_time:688330ms step_avg:2117.94ms step:336/3242 train_loss:4.6110 train_time:690454ms step_avg:2117.96ms step:337/3242 train_loss:4.7980 train_time:692568ms step_avg:2117.95ms step:338/3242 train_loss:4.0533 train_time:694683ms step_avg:2117.94ms step:339/3242 train_loss:3.8356 train_time:696794ms step_avg:2117.91ms step:340/3242 train_loss:4.0280 train_time:698919ms step_avg:2117.94ms step:341/3242 train_loss:4.2618 train_time:701035ms step_avg:2117.93ms step:342/3242 train_loss:4.0636 train_time:703147ms step_avg:2117.91ms step:343/3242 train_loss:3.7989 train_time:705270ms step_avg:2117.93ms step:344/3242 train_loss:3.9297 train_time:707380ms step_avg:2117.90ms step:345/3242 train_loss:3.8577 train_time:709509ms step_avg:2117.94ms step:346/3242 train_loss:3.6504 train_time:711624ms step_avg:2117.93ms step:347/3242 train_loss:3.7984 train_time:713736ms step_avg:2117.91ms step:348/3242 train_loss:3.9380 train_time:715849ms step_avg:2117.90ms step:349/3242 train_loss:3.8435 train_time:717969ms step_avg:2117.90ms step:350/3242 train_loss:3.5025 train_time:720081ms step_avg:2117.88ms step:351/3242 train_loss:3.6244 train_time:722200ms step_avg:2117.89ms step:352/3242 train_loss:4.0811 train_time:724321ms step_avg:2117.90ms step:353/3242 train_loss:3.9925 train_time:726442ms step_avg:2117.91ms step:354/3242 train_loss:3.9196 train_time:728559ms step_avg:2117.90ms step:355/3242 train_loss:4.1272 train_time:730672ms step_avg:2117.89ms step:356/3242 train_loss:4.1040 train_time:732796ms step_avg:2117.91ms step:357/3242 train_loss:4.0914 train_time:734913ms step_avg:2117.90ms step:358/3242 train_loss:3.7532 train_time:737038ms step_avg:2117.93ms step:359/3242 train_loss:4.1236 train_time:739153ms step_avg:2117.92ms step:360/3242 train_loss:3.4113 train_time:741272ms step_avg:2117.92ms step:361/3242 train_loss:3.9321 train_time:743388ms step_avg:2117.91ms step:362/3242 train_loss:3.9368 train_time:745502ms step_avg:2117.90ms step:363/3242 train_loss:3.8811 train_time:747619ms step_avg:2117.90ms step:364/3242 train_loss:3.8121 train_time:749733ms step_avg:2117.89ms step:365/3242 train_loss:4.0553 train_time:751857ms step_avg:2117.91ms step:366/3242 train_loss:4.2510 train_time:753975ms step_avg:2117.91ms step:367/3242 train_loss:3.9510 train_time:756090ms step_avg:2117.90ms step:368/3242 train_loss:3.8420 train_time:758216ms step_avg:2117.92ms step:369/3242 train_loss:3.4702 train_time:760324ms step_avg:2117.89ms step:370/3242 train_loss:3.9619 train_time:762442ms step_avg:2117.89ms step:371/3242 train_loss:3.7599 train_time:764562ms step_avg:2117.90ms step:372/3242 train_loss:3.7512 train_time:766684ms step_avg:2117.91ms step:373/3242 train_loss:4.0426 train_time:768805ms step_avg:2117.92ms step:374/3242 train_loss:3.9685 train_time:770924ms step_avg:2117.92ms step:375/3242 train_loss:3.8829 train_time:773049ms step_avg:2117.94ms step:375/3242 val_loss:3.9142 train_time:773463ms step_avg:2119.08ms step:376/3242 train_loss:3.7470 train_time:775175ms step_avg:2117.97ms step:377/3242 train_loss:4.1128 train_time:777289ms step_avg:2117.95ms step:378/3242 train_loss:3.8625 train_time:779410ms step_avg:2117.96ms step:379/3242 train_loss:3.9692 train_time:781527ms step_avg:2117.96ms step:380/3242 train_loss:4.0500 train_time:783648ms step_avg:2117.97ms step:381/3242 train_loss:3.8479 train_time:785906ms step_avg:2118.34ms step:382/3242 train_loss:3.8645 train_time:788024ms step_avg:2118.34ms step:383/3242 train_loss:3.9453 train_time:790137ms step_avg:2118.33ms step:384/3242 train_loss:3.8317 train_time:792257ms step_avg:2118.33ms step:385/3242 train_loss:3.6312 train_time:794374ms step_avg:2118.33ms step:386/3242 train_loss:4.0761 train_time:796499ms step_avg:2118.35ms step:387/3242 train_loss:3.9211 train_time:798627ms step_avg:2118.37ms step:388/3242 train_loss:3.8425 train_time:800742ms step_avg:2118.37ms step:389/3242 train_loss:4.0243 train_time:802865ms step_avg:2118.38ms step:390/3242 train_loss:4.0887 train_time:804976ms step_avg:2118.36ms step:391/3242 train_loss:3.7857 train_time:807093ms step_avg:2118.36ms step:392/3242 train_loss:3.6253 train_time:809215ms step_avg:2118.37ms step:393/3242 train_loss:3.7436 train_time:811337ms step_avg:2118.37ms step:394/3242 train_loss:4.0610 train_time:813454ms step_avg:2118.37ms step:395/3242 train_loss:4.0215 train_time:815568ms step_avg:2118.36ms step:396/3242 train_loss:3.8991 train_time:817691ms step_avg:2118.37ms step:397/3242 train_loss:3.6278 train_time:819815ms step_avg:2118.39ms step:398/3242 train_loss:3.9545 train_time:821933ms step_avg:2118.38ms step:399/3242 train_loss:3.7301 train_time:824048ms step_avg:2118.37ms step:400/3242 train_loss:3.7133 train_time:826173ms step_avg:2118.39ms step:401/3242 train_loss:3.8536 train_time:828283ms step_avg:2118.37ms step:402/3242 train_loss:3.7598 train_time:830410ms step_avg:2118.39ms step:403/3242 train_loss:3.6522 train_time:832523ms step_avg:2118.38ms step:404/3242 train_loss:3.9222 train_time:834644ms step_avg:2118.39ms step:405/3242 train_loss:4.0379 train_time:836760ms step_avg:2118.38ms step:406/3242 train_loss:3.9107 train_time:838877ms step_avg:2118.38ms step:407/3242 train_loss:3.8909 train_time:840998ms step_avg:2118.38ms step:408/3242 train_loss:3.8627 train_time:843117ms step_avg:2118.38ms step:409/3242 train_loss:3.7306 train_time:845228ms step_avg:2118.37ms step:410/3242 train_loss:3.8566 train_time:847352ms step_avg:2118.38ms step:411/3242 train_loss:3.8901 train_time:849465ms step_avg:2118.37ms step:412/3242 train_loss:4.0862 train_time:851590ms step_avg:2118.38ms step:413/3242 train_loss:3.8795 train_time:853711ms step_avg:2118.39ms step:414/3242 train_loss:3.8173 train_time:855827ms step_avg:2118.38ms step:415/3242 train_loss:3.7327 train_time:857944ms step_avg:2118.38ms step:416/3242 train_loss:4.0668 train_time:860061ms step_avg:2118.38ms step:417/3242 train_loss:4.0715 train_time:862185ms step_avg:2118.39ms step:418/3242 train_loss:3.8128 train_time:864298ms step_avg:2118.38ms step:419/3242 train_loss:3.9232 train_time:866416ms step_avg:2118.38ms step:420/3242 train_loss:4.2602 train_time:868535ms step_avg:2118.38ms step:421/3242 train_loss:3.8809 train_time:870656ms step_avg:2118.38ms step:422/3242 train_loss:4.0996 train_time:872771ms step_avg:2118.38ms step:423/3242 train_loss:3.6233 train_time:874895ms step_avg:2118.39ms step:424/3242 train_loss:3.7318 train_time:877006ms step_avg:2118.37ms step:425/3242 train_loss:3.5978 train_time:879128ms step_avg:2118.38ms step:426/3242 train_loss:3.9806 train_time:881248ms step_avg:2118.39ms step:427/3242 train_loss:3.8205 train_time:883369ms step_avg:2118.39ms step:428/3242 train_loss:4.0169 train_time:885478ms step_avg:2118.37ms step:429/3242 train_loss:3.8754 train_time:887600ms step_avg:2118.38ms step:430/3242 train_loss:3.6380 train_time:889715ms step_avg:2118.37ms step:431/3242 train_loss:3.5258 train_time:891833ms step_avg:2118.37ms step:432/3242 train_loss:4.0724 train_time:893951ms step_avg:2118.37ms step:433/3242 train_loss:3.9582 train_time:896077ms step_avg:2118.39ms step:434/3242 train_loss:3.8647 train_time:898196ms step_avg:2118.39ms step:435/3242 train_loss:3.6718 train_time:900311ms step_avg:2118.38ms step:436/3242 train_loss:3.9849 train_time:902429ms step_avg:2118.38ms step:437/3242 train_loss:3.8118 train_time:904553ms step_avg:2118.39ms step:438/3242 train_loss:3.8202 train_time:906676ms step_avg:2118.40ms step:439/3242 train_loss:3.9013 train_time:908790ms step_avg:2118.39ms step:440/3242 train_loss:3.7535 train_time:910911ms step_avg:2118.40ms step:441/3242 train_loss:3.7087 train_time:913033ms step_avg:2118.41ms step:442/3242 train_loss:3.9244 train_time:915144ms step_avg:2118.39ms step:443/3242 train_loss:3.6299 train_time:917266ms step_avg:2118.40ms step:444/3242 train_loss:3.7332 train_time:919385ms step_avg:2118.40ms step:445/3242 train_loss:4.0070 train_time:921502ms step_avg:2118.39ms step:446/3242 train_loss:3.7163 train_time:923626ms step_avg:2118.41ms step:447/3242 train_loss:3.9368 train_time:925748ms step_avg:2118.42ms step:448/3242 train_loss:4.1343 train_time:927867ms step_avg:2118.42ms step:449/3242 train_loss:3.8438 train_time:929985ms step_avg:2118.42ms step:450/3242 train_loss:4.0468 train_time:932098ms step_avg:2118.41ms step:451/3242 train_loss:3.8482 train_time:934223ms step_avg:2118.42ms step:452/3242 train_loss:3.9933 train_time:936336ms step_avg:2118.41ms step:453/3242 train_loss:4.6770 train_time:938459ms step_avg:2118.42ms step:454/3242 train_loss:3.4557 train_time:940577ms step_avg:2118.42ms step:455/3242 train_loss:3.8344 train_time:942690ms step_avg:2118.40ms step:456/3242 train_loss:3.7808 train_time:944811ms step_avg:2118.41ms step:457/3242 train_loss:3.7976 train_time:946933ms step_avg:2118.42ms step:458/3242 train_loss:3.9440 train_time:949056ms step_avg:2118.43ms step:459/3242 train_loss:4.1026 train_time:951176ms step_avg:2118.43ms step:460/3242 train_loss:3.4696 train_time:953295ms step_avg:2118.43ms step:461/3242 train_loss:3.7719 train_time:955417ms step_avg:2118.44ms step:462/3242 train_loss:4.0181 train_time:957537ms step_avg:2118.44ms step:463/3242 train_loss:3.8066 train_time:959652ms step_avg:2118.44ms step:464/3242 train_loss:3.8973 train_time:961776ms step_avg:2118.45ms step:465/3242 train_loss:3.9107 train_time:963891ms step_avg:2118.44ms step:466/3242 train_loss:4.1850 train_time:966007ms step_avg:2118.44ms step:467/3242 train_loss:3.7691 train_time:968133ms step_avg:2118.45ms step:468/3242 train_loss:3.8400 train_time:970251ms step_avg:2118.45ms step:469/3242 train_loss:4.4968 train_time:972364ms step_avg:2118.44ms step:470/3242 train_loss:3.9215 train_time:974486ms step_avg:2118.45ms step:471/3242 train_loss:3.7842 train_time:976603ms step_avg:2118.45ms step:472/3242 train_loss:3.9005 train_time:978721ms step_avg:2118.44ms step:473/3242 train_loss:3.8083 train_time:980853ms step_avg:2118.47ms step:474/3242 train_loss:3.8041 train_time:982972ms step_avg:2118.47ms step:475/3242 train_loss:3.8949 train_time:985088ms step_avg:2118.47ms step:476/3242 train_loss:3.8910 train_time:987208ms step_avg:2118.47ms step:477/3242 train_loss:4.1878 train_time:989337ms step_avg:2118.50ms step:478/3242 train_loss:3.7721 train_time:991453ms step_avg:2118.49ms step:479/3242 train_loss:3.7424 train_time:993568ms step_avg:2118.48ms step:480/3242 train_loss:3.7242 train_time:995693ms step_avg:2118.50ms step:481/3242 train_loss:3.6070 train_time:997815ms step_avg:2118.50ms step:482/3242 train_loss:4.0192 train_time:999935ms step_avg:2118.51ms step:483/3242 train_loss:3.8160 train_time:1002049ms step_avg:2118.50ms step:484/3242 train_loss:3.7993 train_time:1004166ms step_avg:2118.49ms step:485/3242 train_loss:3.7625 train_time:1006289ms step_avg:2118.50ms step:486/3242 train_loss:3.6103 train_time:1008410ms step_avg:2118.51ms step:487/3242 train_loss:3.7331 train_time:1010531ms step_avg:2118.51ms step:488/3242 train_loss:3.7760 train_time:1012647ms step_avg:2118.51ms step:489/3242 train_loss:3.8418 train_time:1014762ms step_avg:2118.50ms step:490/3242 train_loss:3.9339 train_time:1016882ms step_avg:2118.50ms step:491/3242 train_loss:3.7080 train_time:1019002ms step_avg:2118.51ms step:492/3242 train_loss:3.8125 train_time:1021121ms step_avg:2118.51ms step:493/3242 train_loss:3.8932 train_time:1023245ms step_avg:2118.52ms step:494/3242 train_loss:3.5008 train_time:1025365ms step_avg:2118.52ms step:495/3242 train_loss:4.0759 train_time:1027485ms step_avg:2118.53ms step:496/3242 train_loss:4.3423 train_time:1029609ms step_avg:2118.54ms step:497/3242 train_loss:3.8541 train_time:1031730ms step_avg:2118.54ms step:498/3242 train_loss:3.9692 train_time:1033843ms step_avg:2118.53ms step:499/3242 train_loss:3.8420 train_time:1035966ms step_avg:2118.54ms step:500/3242 train_loss:3.7026 train_time:1038080ms step_avg:2118.53ms step:500/3242 val_loss:3.8119 train_time:1038494ms step_avg:2119.37ms step:501/3242 train_loss:3.7384 train_time:1040200ms step_avg:2118.53ms step:502/3242 train_loss:3.7140 train_time:1042326ms step_avg:2118.55ms step:503/3242 train_loss:3.9052 train_time:1044446ms step_avg:2118.55ms step:504/3242 train_loss:3.6578 train_time:1046560ms step_avg:2118.54ms step:505/3242 train_loss:3.8164 train_time:1048679ms step_avg:2118.54ms step:506/3242 train_loss:3.7533 train_time:1050806ms step_avg:2118.56ms step:507/3242 train_loss:4.0485 train_time:1052924ms step_avg:2118.56ms step:508/3242 train_loss:4.5439 train_time:1055039ms step_avg:2118.55ms step:509/3242 train_loss:3.5567 train_time:1057156ms step_avg:2118.55ms step:510/3242 train_loss:3.9899 train_time:1059271ms step_avg:2118.54ms step:511/3242 train_loss:3.9503 train_time:1061394ms step_avg:2118.55ms step:512/3242 train_loss:3.8122 train_time:1063511ms step_avg:2118.55ms step:513/3242 train_loss:3.4330 train_time:1065633ms step_avg:2118.55ms step:514/3242 train_loss:3.6137 train_time:1067751ms step_avg:2118.55ms step:515/3242 train_loss:4.7736 train_time:1069869ms step_avg:2118.55ms step:516/3242 train_loss:3.9751 train_time:1071985ms step_avg:2118.55ms step:517/3242 train_loss:3.6027 train_time:1074104ms step_avg:2118.55ms step:518/3242 train_loss:3.6072 train_time:1076228ms step_avg:2118.56ms step:519/3242 train_loss:3.5344 train_time:1078346ms step_avg:2118.56ms step:520/3242 train_loss:3.9334 train_time:1080471ms step_avg:2118.57ms step:521/3242 train_loss:3.7828 train_time:1082586ms step_avg:2118.56ms step:522/3242 train_loss:3.4865 train_time:1084702ms step_avg:2118.56ms step:523/3242 train_loss:3.9828 train_time:1086825ms step_avg:2118.57ms step:524/3242 train_loss:3.6079 train_time:1088942ms step_avg:2118.56ms step:525/3242 train_loss:3.6735 train_time:1091063ms step_avg:2118.57ms step:526/3242 train_loss:4.1020 train_time:1093179ms step_avg:2118.56ms step:527/3242 train_loss:3.8160 train_time:1095300ms step_avg:2118.57ms step:528/3242 train_loss:3.6639 train_time:1097423ms step_avg:2118.58ms step:529/3242 train_loss:3.8906 train_time:1099542ms step_avg:2118.58ms step:530/3242 train_loss:3.7188 train_time:1101662ms step_avg:2118.58ms step:531/3242 train_loss:3.8101 train_time:1103783ms step_avg:2118.58ms step:532/3242 train_loss:3.7843 train_time:1105903ms step_avg:2118.59ms step:533/3242 train_loss:3.7617 train_time:1108019ms step_avg:2118.58ms step:534/3242 train_loss:3.8737 train_time:1110133ms step_avg:2118.57ms step:535/3242 train_loss:3.9150 train_time:1112262ms step_avg:2118.59ms step:536/3242 train_loss:3.8247 train_time:1114374ms step_avg:2118.58ms step:537/3242 train_loss:4.0140 train_time:1116495ms step_avg:2118.59ms step:538/3242 train_loss:3.7400 train_time:1118610ms step_avg:2118.58ms step:539/3242 train_loss:3.9439 train_time:1120736ms step_avg:2118.59ms step:540/3242 train_loss:3.8747 train_time:1122855ms step_avg:2118.59ms step:541/3242 train_loss:3.6532 train_time:1124970ms step_avg:2118.59ms step:542/3242 train_loss:3.9227 train_time:1127095ms step_avg:2118.60ms step:543/3242 train_loss:3.7919 train_time:1129216ms step_avg:2118.60ms step:544/3242 train_loss:3.7534 train_time:1131328ms step_avg:2118.59ms step:545/3242 train_loss:3.8738 train_time:1133447ms step_avg:2118.59ms step:546/3242 train_loss:3.8975 train_time:1135568ms step_avg:2118.60ms step:547/3242 train_loss:3.8268 train_time:1137681ms step_avg:2118.59ms step:548/3242 train_loss:4.1118 train_time:1139804ms step_avg:2118.59ms step:549/3242 train_loss:3.8821 train_time:1141926ms step_avg:2118.60ms step:550/3242 train_loss:3.9455 train_time:1144043ms step_avg:2118.60ms step:551/3242 train_loss:3.9233 train_time:1146169ms step_avg:2118.61ms step:552/3242 train_loss:3.8216 train_time:1148283ms step_avg:2118.60ms step:553/3242 train_loss:3.7325 train_time:1150403ms step_avg:2118.61ms step:554/3242 train_loss:3.6724 train_time:1152530ms step_avg:2118.62ms step:555/3242 train_loss:3.5968 train_time:1154641ms step_avg:2118.61ms step:556/3242 train_loss:4.2825 train_time:1156757ms step_avg:2118.60ms step:557/3242 train_loss:3.6901 train_time:1158881ms step_avg:2118.61ms step:558/3242 train_loss:3.6417 train_time:1161003ms step_avg:2118.62ms step:559/3242 train_loss:3.7899 train_time:1163121ms step_avg:2118.62ms step:560/3242 train_loss:3.6639 train_time:1165243ms step_avg:2118.62ms step:561/3242 train_loss:3.7117 train_time:1167354ms step_avg:2118.61ms step:562/3242 train_loss:3.7856 train_time:1169481ms step_avg:2118.63ms step:563/3242 train_loss:3.5296 train_time:1171595ms step_avg:2118.62ms step:564/3242 train_loss:3.9145 train_time:1173715ms step_avg:2118.62ms step:565/3242 train_loss:3.6417 train_time:1175838ms step_avg:2118.63ms step:566/3242 train_loss:3.8054 train_time:1177951ms step_avg:2118.62ms step:567/3242 train_loss:4.2167 train_time:1180074ms step_avg:2118.62ms step:568/3242 train_loss:3.5455 train_time:1182182ms step_avg:2118.61ms step:569/3242 train_loss:5.1336 train_time:1184296ms step_avg:2118.60ms step:570/3242 train_loss:3.7611 train_time:1186416ms step_avg:2118.60ms step:571/3242 train_loss:3.7616 train_time:1188535ms step_avg:2118.60ms step:572/3242 train_loss:3.9105 train_time:1190781ms step_avg:2118.83ms step:573/3242 train_loss:3.8324 train_time:1192905ms step_avg:2118.84ms step:574/3242 train_loss:3.8950 train_time:1195028ms step_avg:2118.84ms step:575/3242 train_loss:3.9357 train_time:1197148ms step_avg:2118.85ms step:576/3242 train_loss:3.8152 train_time:1199275ms step_avg:2118.86ms step:577/3242 train_loss:3.9320 train_time:1201393ms step_avg:2118.86ms step:578/3242 train_loss:3.8301 train_time:1203512ms step_avg:2118.86ms step:579/3242 train_loss:3.7461 train_time:1205633ms step_avg:2118.86ms step:580/3242 train_loss:3.8559 train_time:1207752ms step_avg:2118.86ms step:581/3242 train_loss:3.6740 train_time:1209866ms step_avg:2118.85ms step:582/3242 train_loss:3.7821 train_time:1211992ms step_avg:2118.87ms step:583/3242 train_loss:3.8755 train_time:1214110ms step_avg:2118.87ms step:584/3242 train_loss:3.7245 train_time:1216228ms step_avg:2118.86ms step:585/3242 train_loss:3.6337 train_time:1218349ms step_avg:2118.87ms step:586/3242 train_loss:4.2185 train_time:1220468ms step_avg:2118.87ms step:587/3242 train_loss:3.6264 train_time:1222581ms step_avg:2118.86ms step:588/3242 train_loss:3.9267 train_time:1224692ms step_avg:2118.84ms step:589/3242 train_loss:3.7613 train_time:1226821ms step_avg:2118.86ms step:590/3242 train_loss:3.8700 train_time:1228933ms step_avg:2118.85ms step:591/3242 train_loss:3.8172 train_time:1231045ms step_avg:2118.84ms step:592/3242 train_loss:3.4108 train_time:1233164ms step_avg:2118.84ms step:593/3242 train_loss:3.6593 train_time:1235283ms step_avg:2118.84ms step:594/3242 train_loss:3.7285 train_time:1237407ms step_avg:2118.85ms step:595/3242 train_loss:3.5505 train_time:1239521ms step_avg:2118.84ms step:596/3242 train_loss:4.1679 train_time:1241639ms step_avg:2118.84ms step:597/3242 train_loss:3.8371 train_time:1243751ms step_avg:2118.83ms step:598/3242 train_loss:3.6571 train_time:1245873ms step_avg:2118.83ms step:599/3242 train_loss:3.8592 train_time:1247990ms step_avg:2118.83ms step:600/3242 train_loss:3.5726 train_time:1250113ms step_avg:2118.84ms step:601/3242 train_loss:3.6495 train_time:1252237ms step_avg:2118.84ms step:602/3242 train_loss:3.7433 train_time:1254356ms step_avg:2118.84ms step:603/3242 train_loss:3.8725 train_time:1256474ms step_avg:2118.84ms step:604/3242 train_loss:3.8502 train_time:1258593ms step_avg:2118.84ms step:605/3242 train_loss:3.7869 train_time:1260719ms step_avg:2118.85ms step:606/3242 train_loss:3.9089 train_time:1262838ms step_avg:2118.85ms step:607/3242 train_loss:3.8233 train_time:1264960ms step_avg:2118.86ms step:608/3242 train_loss:4.1625 train_time:1267075ms step_avg:2118.85ms step:609/3242 train_loss:3.8682 train_time:1269196ms step_avg:2118.86ms step:610/3242 train_loss:3.7094 train_time:1271321ms step_avg:2118.87ms step:611/3242 train_loss:4.2520 train_time:1273449ms step_avg:2118.88ms step:612/3242 train_loss:3.6964 train_time:1275570ms step_avg:2118.89ms step:613/3242 train_loss:3.7720 train_time:1277689ms step_avg:2118.89ms step:614/3242 train_loss:4.0351 train_time:1279795ms step_avg:2118.87ms step:615/3242 train_loss:3.7720 train_time:1281915ms step_avg:2118.87ms step:616/3242 train_loss:3.9128 train_time:1284031ms step_avg:2118.86ms step:617/3242 train_loss:3.7813 train_time:1286158ms step_avg:2118.88ms step:618/3242 train_loss:3.5229 train_time:1288276ms step_avg:2118.87ms step:619/3242 train_loss:3.6977 train_time:1290392ms step_avg:2118.87ms step:620/3242 train_loss:3.8328 train_time:1292514ms step_avg:2118.88ms step:621/3242 train_loss:3.5818 train_time:1294630ms step_avg:2118.87ms step:622/3242 train_loss:3.6986 train_time:1296750ms step_avg:2118.87ms step:623/3242 train_loss:3.5433 train_time:1298870ms step_avg:2118.87ms step:624/3242 train_loss:3.5832 train_time:1300987ms step_avg:2118.87ms step:625/3242 train_loss:3.9648 train_time:1303107ms step_avg:2118.87ms step:625/3242 val_loss:3.7386 train_time:1303521ms step_avg:2119.55ms step:626/3242 train_loss:3.8766 train_time:1305237ms step_avg:2118.89ms step:627/3242 train_loss:4.0635 train_time:1307348ms step_avg:2118.88ms step:628/3242 train_loss:3.3542 train_time:1309471ms step_avg:2118.88ms step:629/3242 train_loss:3.6572 train_time:1311592ms step_avg:2118.89ms step:630/3242 train_loss:3.6358 train_time:1313706ms step_avg:2118.88ms step:631/3242 train_loss:3.6483 train_time:1315830ms step_avg:2118.89ms step:632/3242 train_loss:3.7638 train_time:1317947ms step_avg:2118.89ms step:633/3242 train_loss:3.8934 train_time:1320067ms step_avg:2118.89ms step:634/3242 train_loss:3.5919 train_time:1322188ms step_avg:2118.89ms step:635/3242 train_loss:3.7517 train_time:1324304ms step_avg:2118.89ms step:636/3242 train_loss:4.0722 train_time:1326423ms step_avg:2118.89ms step:637/3242 train_loss:3.7775 train_time:1328542ms step_avg:2118.89ms step:638/3242 train_loss:3.7313 train_time:1330660ms step_avg:2118.89ms step:639/3242 train_loss:3.9998 train_time:1332783ms step_avg:2118.89ms step:640/3242 train_loss:3.8059 train_time:1334901ms step_avg:2118.89ms step:641/3242 train_loss:3.7499 train_time:1337024ms step_avg:2118.90ms step:642/3242 train_loss:3.6923 train_time:1339142ms step_avg:2118.90ms step:643/3242 train_loss:3.6974 train_time:1341261ms step_avg:2118.90ms step:644/3242 train_loss:3.8138 train_time:1343378ms step_avg:2118.89ms step:645/3242 train_loss:3.4699 train_time:1345497ms step_avg:2118.89ms step:646/3242 train_loss:3.6405 train_time:1347616ms step_avg:2118.89ms step:647/3242 train_loss:3.5775 train_time:1349734ms step_avg:2118.89ms step:648/3242 train_loss:3.6689 train_time:1351856ms step_avg:2118.90ms step:649/3242 train_loss:3.8802 train_time:1353976ms step_avg:2118.90ms step:650/3242 train_loss:3.8721 train_time:1356094ms step_avg:2118.90ms step:651/3242 train_loss:3.7345 train_time:1358216ms step_avg:2118.90ms step:652/3242 train_loss:4.2421 train_time:1360336ms step_avg:2118.90ms step:653/3242 train_loss:3.7323 train_time:1362456ms step_avg:2118.90ms step:654/3242 train_loss:3.7623 train_time:1364569ms step_avg:2118.90ms step:655/3242 train_loss:3.3911 train_time:1366694ms step_avg:2118.91ms step:656/3242 train_loss:3.8237 train_time:1368810ms step_avg:2118.90ms step:657/3242 train_loss:3.6289 train_time:1370934ms step_avg:2118.91ms step:658/3242 train_loss:3.6276 train_time:1373048ms step_avg:2118.90ms step:659/3242 train_loss:3.8488 train_time:1375171ms step_avg:2118.91ms step:660/3242 train_loss:3.6827 train_time:1377285ms step_avg:2118.90ms step:661/3242 train_loss:3.7240 train_time:1379414ms step_avg:2118.91ms step:662/3242 train_loss:3.6563 train_time:1381524ms step_avg:2118.90ms step:663/3242 train_loss:3.5233 train_time:1383648ms step_avg:2118.91ms step:664/3242 train_loss:3.8173 train_time:1385766ms step_avg:2118.91ms step:665/3242 train_loss:3.9551 train_time:1387887ms step_avg:2118.91ms step:666/3242 train_loss:3.6096 train_time:1390003ms step_avg:2118.91ms step:667/3242 train_loss:4.0008 train_time:1392119ms step_avg:2118.90ms step:668/3242 train_loss:3.8018 train_time:1394239ms step_avg:2118.90ms step:669/3242 train_loss:3.7060 train_time:1396360ms step_avg:2118.91ms step:670/3242 train_loss:3.7076 train_time:1398476ms step_avg:2118.90ms step:671/3242 train_loss:3.4086 train_time:1400600ms step_avg:2118.91ms step:672/3242 train_loss:3.5696 train_time:1402721ms step_avg:2118.91ms step:673/3242 train_loss:3.6125 train_time:1404839ms step_avg:2118.91ms step:674/3242 train_loss:5.1121 train_time:1406961ms step_avg:2118.92ms step:675/3242 train_loss:3.8448 train_time:1409080ms step_avg:2118.92ms step:676/3242 train_loss:3.9923 train_time:1411196ms step_avg:2118.91ms step:677/3242 train_loss:3.6071 train_time:1413321ms step_avg:2118.92ms step:678/3242 train_loss:3.7271 train_time:1415440ms step_avg:2118.92ms step:679/3242 train_loss:3.6525 train_time:1417553ms step_avg:2118.91ms step:680/3242 train_loss:3.8132 train_time:1419680ms step_avg:2118.93ms step:681/3242 train_loss:3.7473 train_time:1421792ms step_avg:2118.92ms step:682/3242 train_loss:3.5674 train_time:1423912ms step_avg:2118.92ms step:683/3242 train_loss:3.7040 train_time:1426031ms step_avg:2118.92ms step:684/3242 train_loss:3.9680 train_time:1428152ms step_avg:2118.92ms step:685/3242 train_loss:3.7906 train_time:1430267ms step_avg:2118.91ms step:686/3242 train_loss:3.9428 train_time:1432390ms step_avg:2118.92ms step:687/3242 train_loss:3.7634 train_time:1434513ms step_avg:2118.93ms step:688/3242 train_loss:3.8702 train_time:1436633ms step_avg:2118.93ms step:689/3242 train_loss:3.3538 train_time:1438745ms step_avg:2118.92ms step:690/3242 train_loss:3.5795 train_time:1440865ms step_avg:2118.92ms step:691/3242 train_loss:3.8415 train_time:1442989ms step_avg:2118.93ms step:692/3242 train_loss:3.4846 train_time:1445105ms step_avg:2118.92ms step:693/3242 train_loss:3.7378 train_time:1447224ms step_avg:2118.92ms step:694/3242 train_loss:3.5844 train_time:1449348ms step_avg:2118.93ms step:695/3242 train_loss:3.4938 train_time:1451468ms step_avg:2118.93ms step:696/3242 train_loss:3.4708 train_time:1453583ms step_avg:2118.93ms step:697/3242 train_loss:3.6120 train_time:1455702ms step_avg:2118.93ms step:698/3242 train_loss:3.6661 train_time:1457817ms step_avg:2118.92ms step:699/3242 train_loss:3.6707 train_time:1459939ms step_avg:2118.93ms step:700/3242 train_loss:3.6038 train_time:1462060ms step_avg:2118.93ms step:701/3242 train_loss:3.6463 train_time:1464176ms step_avg:2118.92ms step:702/3242 train_loss:3.6292 train_time:1466297ms step_avg:2118.93ms step:703/3242 train_loss:3.6929 train_time:1468415ms step_avg:2118.93ms step:704/3242 train_loss:3.6039 train_time:1470541ms step_avg:2118.93ms step:705/3242 train_loss:3.9366 train_time:1472656ms step_avg:2118.93ms step:706/3242 train_loss:3.6484 train_time:1474780ms step_avg:2118.94ms step:707/3242 train_loss:3.7288 train_time:1476897ms step_avg:2118.93ms step:708/3242 train_loss:3.8192 train_time:1479015ms step_avg:2118.93ms step:709/3242 train_loss:3.6428 train_time:1481133ms step_avg:2118.93ms step:710/3242 train_loss:3.7615 train_time:1483255ms step_avg:2118.94ms step:711/3242 train_loss:3.4999 train_time:1485380ms step_avg:2118.94ms step:712/3242 train_loss:3.5517 train_time:1487491ms step_avg:2118.93ms step:713/3242 train_loss:3.9526 train_time:1489617ms step_avg:2118.94ms step:714/3242 train_loss:3.7035 train_time:1491735ms step_avg:2118.94ms step:715/3242 train_loss:3.7158 train_time:1493854ms step_avg:2118.94ms step:716/3242 train_loss:3.8474 train_time:1495975ms step_avg:2118.94ms step:717/3242 train_loss:3.8125 train_time:1498089ms step_avg:2118.94ms step:718/3242 train_loss:3.8668 train_time:1500205ms step_avg:2118.93ms step:719/3242 train_loss:3.5099 train_time:1502321ms step_avg:2118.93ms step:720/3242 train_loss:3.5830 train_time:1504439ms step_avg:2118.93ms step:721/3242 train_loss:4.5190 train_time:1506548ms step_avg:2118.91ms step:722/3242 train_loss:3.4701 train_time:1508666ms step_avg:2118.91ms step:723/3242 train_loss:3.9272 train_time:1510787ms step_avg:2118.92ms step:724/3242 train_loss:3.7091 train_time:1512906ms step_avg:2118.92ms step:725/3242 train_loss:3.6869 train_time:1515023ms step_avg:2118.91ms step:726/3242 train_loss:3.8555 train_time:1517144ms step_avg:2118.92ms step:727/3242 train_loss:3.6990 train_time:1519261ms step_avg:2118.91ms step:728/3242 train_loss:3.5758 train_time:1521383ms step_avg:2118.92ms step:729/3242 train_loss:3.7827 train_time:1523502ms step_avg:2118.92ms step:730/3242 train_loss:3.4947 train_time:1525620ms step_avg:2118.92ms step:731/3242 train_loss:3.7213 train_time:1527740ms step_avg:2118.92ms step:732/3242 train_loss:3.6748 train_time:1529862ms step_avg:2118.92ms step:733/3242 train_loss:3.7098 train_time:1531983ms step_avg:2118.92ms step:734/3242 train_loss:4.6685 train_time:1534097ms step_avg:2118.92ms step:735/3242 train_loss:3.4273 train_time:1536216ms step_avg:2118.92ms step:736/3242 train_loss:3.7451 train_time:1538344ms step_avg:2118.93ms step:737/3242 train_loss:3.9106 train_time:1540452ms step_avg:2118.92ms step:738/3242 train_loss:3.7309 train_time:1542572ms step_avg:2118.92ms step:739/3242 train_loss:3.6285 train_time:1544689ms step_avg:2118.92ms step:740/3242 train_loss:3.9105 train_time:1546802ms step_avg:2118.91ms step:741/3242 train_loss:3.8443 train_time:1548913ms step_avg:2118.90ms step:742/3242 train_loss:3.6588 train_time:1551038ms step_avg:2118.90ms step:743/3242 train_loss:3.9055 train_time:1553154ms step_avg:2118.90ms step:744/3242 train_loss:3.6383 train_time:1555275ms step_avg:2118.90ms step:745/3242 train_loss:4.0997 train_time:1557393ms step_avg:2118.90ms step:746/3242 train_loss:3.7361 train_time:1559513ms step_avg:2118.90ms step:747/3242 train_loss:3.7533 train_time:1561628ms step_avg:2118.90ms step:748/3242 train_loss:3.6236 train_time:1563763ms step_avg:2118.92ms step:749/3242 train_loss:3.6607 train_time:1565879ms step_avg:2118.92ms step:750/3242 train_loss:3.6014 train_time:1567990ms step_avg:2118.91ms step:750/3242 val_loss:3.6826 train_time:1568401ms step_avg:2119.46ms step:751/3242 train_loss:3.7381 train_time:1570113ms step_avg:2118.91ms step:752/3242 train_loss:3.6551 train_time:1572239ms step_avg:2118.92ms step:753/3242 train_loss:3.6096 train_time:1574353ms step_avg:2118.91ms step:754/3242 train_loss:3.5306 train_time:1576476ms step_avg:2118.92ms step:755/3242 train_loss:3.9260 train_time:1578597ms step_avg:2118.92ms step:756/3242 train_loss:3.9206 train_time:1580714ms step_avg:2118.92ms step:757/3242 train_loss:3.4670 train_time:1582840ms step_avg:2118.93ms step:758/3242 train_loss:3.7984 train_time:1584960ms step_avg:2118.93ms step:759/3242 train_loss:3.8210 train_time:1587076ms step_avg:2118.93ms step:760/3242 train_loss:3.6895 train_time:1589189ms step_avg:2118.92ms step:761/3242 train_loss:3.5270 train_time:1591317ms step_avg:2118.93ms step:762/3242 train_loss:3.8252 train_time:1593568ms step_avg:2119.11ms step:763/3242 train_loss:3.4613 train_time:1595686ms step_avg:2119.10ms step:764/3242 train_loss:3.6317 train_time:1597810ms step_avg:2119.11ms step:765/3242 train_loss:3.6732 train_time:1599937ms step_avg:2119.12ms step:766/3242 train_loss:3.5302 train_time:1602052ms step_avg:2119.12ms step:767/3242 train_loss:4.0467 train_time:1604170ms step_avg:2119.12ms step:768/3242 train_loss:3.5627 train_time:1606288ms step_avg:2119.11ms step:769/3242 train_loss:3.6076 train_time:1608415ms step_avg:2119.12ms step:770/3242 train_loss:3.7531 train_time:1610534ms step_avg:2119.12ms step:771/3242 train_loss:5.9626 train_time:1612648ms step_avg:2119.12ms step:772/3242 train_loss:3.8307 train_time:1614767ms step_avg:2119.12ms step:773/3242 train_loss:3.7725 train_time:1616884ms step_avg:2119.11ms step:774/3242 train_loss:3.6626 train_time:1619005ms step_avg:2119.12ms step:775/3242 train_loss:3.8145 train_time:1621127ms step_avg:2119.12ms step:776/3242 train_loss:3.4875 train_time:1623251ms step_avg:2119.13ms step:777/3242 train_loss:3.7407 train_time:1625365ms step_avg:2119.12ms step:778/3242 train_loss:3.6795 train_time:1627485ms step_avg:2119.12ms step:779/3242 train_loss:3.7152 train_time:1629594ms step_avg:2119.11ms step:780/3242 train_loss:3.5284 train_time:1631713ms step_avg:2119.11ms step:781/3242 train_loss:3.7206 train_time:1633834ms step_avg:2119.11ms step:782/3242 train_loss:3.5649 train_time:1635954ms step_avg:2119.11ms step:783/3242 train_loss:3.5517 train_time:1638072ms step_avg:2119.11ms step:784/3242 train_loss:3.7292 train_time:1640196ms step_avg:2119.12ms step:785/3242 train_loss:3.8327 train_time:1642318ms step_avg:2119.12ms step:786/3242 train_loss:3.5584 train_time:1644437ms step_avg:2119.12ms step:787/3242 train_loss:3.5813 train_time:1646555ms step_avg:2119.12ms step:788/3242 train_loss:3.9217 train_time:1648671ms step_avg:2119.11ms step:789/3242 train_loss:3.7451 train_time:1650798ms step_avg:2119.12ms step:790/3242 train_loss:3.7431 train_time:1652912ms step_avg:2119.12ms step:791/3242 train_loss:3.7097 train_time:1655030ms step_avg:2119.12ms step:792/3242 train_loss:3.7566 train_time:1657144ms step_avg:2119.11ms step:793/3242 train_loss:3.7214 train_time:1659269ms step_avg:2119.12ms step:794/3242 train_loss:3.7614 train_time:1661387ms step_avg:2119.12ms step:795/3242 train_loss:3.9856 train_time:1663504ms step_avg:2119.11ms step:796/3242 train_loss:3.7015 train_time:1665623ms step_avg:2119.11ms step:797/3242 train_loss:3.7368 train_time:1667746ms step_avg:2119.12ms step:798/3242 train_loss:3.2765 train_time:1669866ms step_avg:2119.12ms step:799/3242 train_loss:3.5997 train_time:1671982ms step_avg:2119.12ms step:800/3242 train_loss:3.4316 train_time:1674105ms step_avg:2119.12ms step:801/3242 train_loss:3.6414 train_time:1676228ms step_avg:2119.13ms step:802/3242 train_loss:3.6790 train_time:1678350ms step_avg:2119.13ms step:803/3242 train_loss:3.7899 train_time:1680473ms step_avg:2119.13ms step:804/3242 train_loss:3.4976 train_time:1682588ms step_avg:2119.13ms step:805/3242 train_loss:3.6011 train_time:1684705ms step_avg:2119.13ms step:806/3242 train_loss:3.3165 train_time:1686820ms step_avg:2119.12ms step:807/3242 train_loss:3.6856 train_time:1688939ms step_avg:2119.12ms step:808/3242 train_loss:3.9066 train_time:1691063ms step_avg:2119.13ms step:809/3242 train_loss:3.7004 train_time:1693176ms step_avg:2119.12ms step:810/3242 train_loss:4.1148 train_time:1695295ms step_avg:2119.12ms step:811/3242 train_loss:3.6744 train_time:1697421ms step_avg:2119.13ms step:812/3242 train_loss:3.5135 train_time:1699537ms step_avg:2119.12ms step:813/3242 train_loss:3.9939 train_time:1701653ms step_avg:2119.12ms step:814/3242 train_loss:3.6150 train_time:1703781ms step_avg:2119.13ms step:815/3242 train_loss:3.5419 train_time:1705901ms step_avg:2119.13ms step:816/3242 train_loss:3.9362 train_time:1708025ms step_avg:2119.14ms step:817/3242 train_loss:3.7196 train_time:1710140ms step_avg:2119.13ms step:818/3242 train_loss:3.6351 train_time:1712254ms step_avg:2119.13ms step:819/3242 train_loss:3.5823 train_time:1714376ms step_avg:2119.13ms step:820/3242 train_loss:3.6506 train_time:1716492ms step_avg:2119.13ms step:821/3242 train_loss:3.6776 train_time:1718614ms step_avg:2119.13ms step:822/3242 train_loss:3.7419 train_time:1720738ms step_avg:2119.14ms step:823/3242 train_loss:3.8604 train_time:1722855ms step_avg:2119.13ms step:824/3242 train_loss:3.3909 train_time:1724968ms step_avg:2119.13ms step:825/3242 train_loss:3.9530 train_time:1727088ms step_avg:2119.13ms step:826/3242 train_loss:3.7382 train_time:1729207ms step_avg:2119.13ms step:827/3242 train_loss:3.4732 train_time:1731331ms step_avg:2119.13ms step:828/3242 train_loss:3.6096 train_time:1733453ms step_avg:2119.14ms step:829/3242 train_loss:3.6116 train_time:1735572ms step_avg:2119.14ms step:830/3242 train_loss:3.6287 train_time:1737684ms step_avg:2119.13ms step:831/3242 train_loss:3.5840 train_time:1739802ms step_avg:2119.13ms step:832/3242 train_loss:3.9486 train_time:1741933ms step_avg:2119.14ms step:833/3242 train_loss:3.7739 train_time:1744047ms step_avg:2119.13ms step:834/3242 train_loss:3.7480 train_time:1746166ms step_avg:2119.13ms step:835/3242 train_loss:3.4619 train_time:1748290ms step_avg:2119.14ms step:836/3242 train_loss:3.9138 train_time:1750405ms step_avg:2119.13ms step:837/3242 train_loss:3.5909 train_time:1752529ms step_avg:2119.14ms step:838/3242 train_loss:3.4625 train_time:1754647ms step_avg:2119.14ms step:839/3242 train_loss:3.5817 train_time:1756752ms step_avg:2119.12ms step:840/3242 train_loss:3.8352 train_time:1758876ms step_avg:2119.13ms step:841/3242 train_loss:3.6303 train_time:1760995ms step_avg:2119.13ms step:842/3242 train_loss:3.4852 train_time:1763113ms step_avg:2119.13ms step:843/3242 train_loss:3.6157 train_time:1765236ms step_avg:2119.13ms step:844/3242 train_loss:3.6932 train_time:1767355ms step_avg:2119.13ms step:845/3242 train_loss:3.6260 train_time:1769475ms step_avg:2119.13ms step:846/3242 train_loss:3.7208 train_time:1771594ms step_avg:2119.13ms step:847/3242 train_loss:3.8745 train_time:1773707ms step_avg:2119.12ms step:848/3242 train_loss:3.4237 train_time:1775830ms step_avg:2119.13ms step:849/3242 train_loss:3.3852 train_time:1777951ms step_avg:2119.13ms step:850/3242 train_loss:3.6767 train_time:1780065ms step_avg:2119.13ms step:851/3242 train_loss:3.6376 train_time:1782187ms step_avg:2119.13ms step:852/3242 train_loss:3.3484 train_time:1784313ms step_avg:2119.14ms step:853/3242 train_loss:3.2029 train_time:1786433ms step_avg:2119.14ms step:854/3242 train_loss:3.7095 train_time:1788552ms step_avg:2119.14ms step:855/3242 train_loss:3.5882 train_time:1790669ms step_avg:2119.14ms step:856/3242 train_loss:3.6204 train_time:1792793ms step_avg:2119.14ms step:857/3242 train_loss:3.8076 train_time:1794914ms step_avg:2119.14ms step:858/3242 train_loss:3.7177 train_time:1797029ms step_avg:2119.14ms step:859/3242 train_loss:3.1544 train_time:1799144ms step_avg:2119.13ms step:860/3242 train_loss:3.6541 train_time:1801266ms step_avg:2119.14ms step:861/3242 train_loss:4.0811 train_time:1803388ms step_avg:2119.14ms step:862/3242 train_loss:3.6032 train_time:1805505ms step_avg:2119.14ms step:863/3242 train_loss:3.5280 train_time:1807626ms step_avg:2119.14ms step:864/3242 train_loss:3.9452 train_time:1809742ms step_avg:2119.14ms step:865/3242 train_loss:3.7644 train_time:1811856ms step_avg:2119.13ms step:866/3242 train_loss:3.7481 train_time:1813982ms step_avg:2119.14ms step:867/3242 train_loss:3.5593 train_time:1816098ms step_avg:2119.13ms step:868/3242 train_loss:3.7106 train_time:1818218ms step_avg:2119.14ms step:869/3242 train_loss:3.5596 train_time:1820342ms step_avg:2119.14ms step:870/3242 train_loss:3.5849 train_time:1822458ms step_avg:2119.14ms step:871/3242 train_loss:3.8406 train_time:1824581ms step_avg:2119.14ms step:872/3242 train_loss:3.5656 train_time:1826698ms step_avg:2119.14ms step:873/3242 train_loss:3.6454 train_time:1828812ms step_avg:2119.13ms step:874/3242 train_loss:3.6456 train_time:1830933ms step_avg:2119.14ms step:875/3242 train_loss:3.6999 train_time:1833054ms step_avg:2119.14ms step:875/3242 val_loss:3.6368 train_time:1833464ms step_avg:2119.61ms step:876/3242 train_loss:3.8489 train_time:1835179ms step_avg:2119.14ms step:877/3242 train_loss:3.4515 train_time:1837307ms step_avg:2119.15ms step:878/3242 train_loss:3.7479 train_time:1839423ms step_avg:2119.15ms step:879/3242 train_loss:3.5248 train_time:1841547ms step_avg:2119.16ms step:880/3242 train_loss:4.3258 train_time:1843664ms step_avg:2119.15ms step:881/3242 train_loss:3.7996 train_time:1845787ms step_avg:2119.16ms step:882/3242 train_loss:3.2603 train_time:1847904ms step_avg:2119.16ms step:883/3242 train_loss:3.7454 train_time:1850032ms step_avg:2119.17ms step:884/3242 train_loss:3.5391 train_time:1852144ms step_avg:2119.16ms step:885/3242 train_loss:3.6985 train_time:1854267ms step_avg:2119.16ms step:886/3242 train_loss:3.7800 train_time:1856380ms step_avg:2119.16ms step:887/3242 train_loss:3.6781 train_time:1858495ms step_avg:2119.15ms step:888/3242 train_loss:3.7759 train_time:1860611ms step_avg:2119.15ms step:889/3242 train_loss:3.7485 train_time:1862737ms step_avg:2119.15ms step:890/3242 train_loss:3.7977 train_time:1864851ms step_avg:2119.15ms step:891/3242 train_loss:3.5247 train_time:1866961ms step_avg:2119.14ms step:892/3242 train_loss:3.7121 train_time:1869083ms step_avg:2119.14ms step:893/3242 train_loss:3.8060 train_time:1871206ms step_avg:2119.15ms step:894/3242 train_loss:3.4144 train_time:1873321ms step_avg:2119.14ms step:895/3242 train_loss:3.5476 train_time:1875438ms step_avg:2119.14ms step:896/3242 train_loss:3.2692 train_time:1877562ms step_avg:2119.14ms step:897/3242 train_loss:3.5302 train_time:1879673ms step_avg:2119.14ms step:898/3242 train_loss:3.7952 train_time:1881794ms step_avg:2119.14ms step:899/3242 train_loss:3.6947 train_time:1883918ms step_avg:2119.14ms step:900/3242 train_loss:3.6534 train_time:1886040ms step_avg:2119.15ms step:901/3242 train_loss:3.6628 train_time:1888163ms step_avg:2119.15ms step:902/3242 train_loss:3.5626 train_time:1890279ms step_avg:2119.15ms step:903/3242 train_loss:3.4684 train_time:1892391ms step_avg:2119.14ms step:904/3242 train_loss:3.6092 train_time:1894510ms step_avg:2119.14ms step:905/3242 train_loss:3.7492 train_time:1896630ms step_avg:2119.14ms step:906/3242 train_loss:3.8053 train_time:1898753ms step_avg:2119.14ms step:907/3242 train_loss:3.6172 train_time:1900872ms step_avg:2119.14ms step:908/3242 train_loss:3.6221 train_time:1902989ms step_avg:2119.14ms step:909/3242 train_loss:3.8157 train_time:1905113ms step_avg:2119.15ms step:910/3242 train_loss:4.0093 train_time:1907237ms step_avg:2119.15ms step:911/3242 train_loss:4.0682 train_time:1909351ms step_avg:2119.15ms step:912/3242 train_loss:3.3300 train_time:1911469ms step_avg:2119.14ms step:913/3242 train_loss:3.6845 train_time:1913588ms step_avg:2119.15ms step:914/3242 train_loss:3.7294 train_time:1915709ms step_avg:2119.15ms step:915/3242 train_loss:3.7000 train_time:1917825ms step_avg:2119.14ms step:916/3242 train_loss:3.7761 train_time:1919947ms step_avg:2119.15ms step:917/3242 train_loss:3.7747 train_time:1922058ms step_avg:2119.14ms step:918/3242 train_loss:3.5547 train_time:1924183ms step_avg:2119.14ms step:919/3242 train_loss:3.8700 train_time:1926298ms step_avg:2119.14ms step:920/3242 train_loss:3.5460 train_time:1928407ms step_avg:2119.13ms step:921/3242 train_loss:3.5663 train_time:1930535ms step_avg:2119.14ms step:922/3242 train_loss:3.5128 train_time:1932647ms step_avg:2119.13ms step:923/3242 train_loss:3.5409 train_time:1934764ms step_avg:2119.13ms step:924/3242 train_loss:4.0640 train_time:1936883ms step_avg:2119.13ms step:925/3242 train_loss:3.4224 train_time:1939001ms step_avg:2119.13ms step:926/3242 train_loss:3.7857 train_time:1941121ms step_avg:2119.13ms step:927/3242 train_loss:3.6433 train_time:1943238ms step_avg:2119.13ms step:928/3242 train_loss:3.7448 train_time:1945357ms step_avg:2119.13ms step:929/3242 train_loss:3.6791 train_time:1947470ms step_avg:2119.12ms step:930/3242 train_loss:3.8031 train_time:1949596ms step_avg:2119.13ms step:931/3242 train_loss:3.5792 train_time:1951718ms step_avg:2119.13ms step:932/3242 train_loss:3.3525 train_time:1953834ms step_avg:2119.13ms step:933/3242 train_loss:3.7457 train_time:1955962ms step_avg:2119.14ms step:934/3242 train_loss:3.9442 train_time:1958080ms step_avg:2119.13ms step:935/3242 train_loss:3.8255 train_time:1960202ms step_avg:2119.14ms step:936/3242 train_loss:3.6037 train_time:1962325ms step_avg:2119.14ms step:937/3242 train_loss:3.5558 train_time:1964440ms step_avg:2119.14ms step:938/3242 train_loss:3.7448 train_time:1966563ms step_avg:2119.14ms step:939/3242 train_loss:3.0129 train_time:1968671ms step_avg:2119.13ms step:940/3242 train_loss:3.7738 train_time:1970791ms step_avg:2119.13ms step:941/3242 train_loss:3.5820 train_time:1972913ms step_avg:2119.13ms step:942/3242 train_loss:3.5165 train_time:1975029ms step_avg:2119.13ms step:943/3242 train_loss:3.8134 train_time:1977151ms step_avg:2119.13ms step:944/3242 train_loss:3.4498 train_time:1979272ms step_avg:2119.14ms step:945/3242 train_loss:3.4466 train_time:1981389ms step_avg:2119.13ms step:946/3242 train_loss:3.7803 train_time:1983514ms step_avg:2119.14ms step:947/3242 train_loss:3.7372 train_time:1985631ms step_avg:2119.14ms step:948/3242 train_loss:3.6639 train_time:1987744ms step_avg:2119.13ms step:949/3242 train_loss:3.4474 train_time:1989863ms step_avg:2119.13ms step:950/3242 train_loss:3.4633 train_time:1991993ms step_avg:2119.14ms step:951/3242 train_loss:3.3629 train_time:1994116ms step_avg:2119.15ms step:952/3242 train_loss:3.8853 train_time:1996230ms step_avg:2119.14ms step:953/3242 train_loss:3.6088 train_time:1998480ms step_avg:2119.28ms step:954/3242 train_loss:3.5677 train_time:2000598ms step_avg:2119.28ms step:955/3242 train_loss:3.6925 train_time:2002727ms step_avg:2119.29ms step:956/3242 train_loss:3.6513 train_time:2004838ms step_avg:2119.28ms step:957/3242 train_loss:3.6888 train_time:2006961ms step_avg:2119.28ms step:958/3242 train_loss:3.4481 train_time:2009078ms step_avg:2119.28ms step:959/3242 train_loss:3.5983 train_time:2011194ms step_avg:2119.28ms step:960/3242 train_loss:3.6472 train_time:2013317ms step_avg:2119.28ms step:961/3242 train_loss:3.8033 train_time:2015438ms step_avg:2119.28ms step:962/3242 train_loss:3.4564 train_time:2017560ms step_avg:2119.29ms step:963/3242 train_loss:3.8745 train_time:2019675ms step_avg:2119.28ms step:964/3242 train_loss:3.8707 train_time:2021792ms step_avg:2119.28ms step:965/3242 train_loss:3.5790 train_time:2023911ms step_avg:2119.28ms step:966/3242 train_loss:3.2378 train_time:2026031ms step_avg:2119.28ms step:967/3242 train_loss:3.4905 train_time:2028151ms step_avg:2119.28ms step:968/3242 train_loss:3.9701 train_time:2030264ms step_avg:2119.27ms step:969/3242 train_loss:3.6169 train_time:2032385ms step_avg:2119.28ms step:970/3242 train_loss:3.6093 train_time:2034506ms step_avg:2119.28ms step:971/3242 train_loss:3.6253 train_time:2036631ms step_avg:2119.28ms step:972/3242 train_loss:3.5647 train_time:2038748ms step_avg:2119.28ms step:973/3242 train_loss:3.5781 train_time:2040864ms step_avg:2119.28ms step:974/3242 train_loss:3.5818 train_time:2042982ms step_avg:2119.28ms step:975/3242 train_loss:3.7726 train_time:2045106ms step_avg:2119.28ms step:976/3242 train_loss:3.7153 train_time:2047223ms step_avg:2119.28ms step:977/3242 train_loss:3.7021 train_time:2049351ms step_avg:2119.29ms step:978/3242 train_loss:3.5797 train_time:2051469ms step_avg:2119.29ms step:979/3242 train_loss:3.9803 train_time:2053593ms step_avg:2119.29ms step:980/3242 train_loss:3.3476 train_time:2055708ms step_avg:2119.29ms step:981/3242 train_loss:3.8635 train_time:2057827ms step_avg:2119.29ms step:982/3242 train_loss:3.6590 train_time:2059960ms step_avg:2119.30ms step:983/3242 train_loss:3.6624 train_time:2062075ms step_avg:2119.30ms step:984/3242 train_loss:3.5812 train_time:2064188ms step_avg:2119.29ms step:985/3242 train_loss:3.6035 train_time:2066314ms step_avg:2119.30ms step:986/3242 train_loss:3.6834 train_time:2068422ms step_avg:2119.28ms step:987/3242 train_loss:3.8319 train_time:2070536ms step_avg:2119.28ms step:988/3242 train_loss:3.5075 train_time:2072656ms step_avg:2119.28ms step:989/3242 train_loss:3.6882 train_time:2074776ms step_avg:2119.28ms step:990/3242 train_loss:3.5506 train_time:2076903ms step_avg:2119.29ms step:991/3242 train_loss:3.6064 train_time:2079027ms step_avg:2119.29ms step:992/3242 train_loss:3.7008 train_time:2081146ms step_avg:2119.29ms step:993/3242 train_loss:3.4962 train_time:2083264ms step_avg:2119.29ms step:994/3242 train_loss:3.7199 train_time:2085378ms step_avg:2119.29ms step:995/3242 train_loss:3.5677 train_time:2087503ms step_avg:2119.29ms step:996/3242 train_loss:3.7368 train_time:2089621ms step_avg:2119.29ms step:997/3242 train_loss:3.5208 train_time:2091738ms step_avg:2119.29ms step:998/3242 train_loss:3.5799 train_time:2093847ms step_avg:2119.28ms step:999/3242 train_loss:3.9395 train_time:2095967ms step_avg:2119.28ms step:1000/3242 train_loss:3.5988 train_time:2098088ms step_avg:2119.28ms step:1000/3242 val_loss:3.5980 train_time:2098502ms step_avg:2119.70ms step:1001/3242 train_loss:3.8411 train_time:2100215ms step_avg:2119.29ms step:1002/3242 train_loss:3.5147 train_time:2102334ms step_avg:2119.29ms step:1003/3242 train_loss:3.3870 train_time:2104460ms step_avg:2119.29ms step:1004/3242 train_loss:3.5166 train_time:2106578ms step_avg:2119.29ms step:1005/3242 train_loss:3.9956 train_time:2108693ms step_avg:2119.29ms step:1006/3242 train_loss:3.4823 train_time:2110815ms step_avg:2119.29ms step:1007/3242 train_loss:3.5388 train_time:2112936ms step_avg:2119.29ms step:1008/3242 train_loss:3.4550 train_time:2115055ms step_avg:2119.29ms step:1009/3242 train_loss:3.5324 train_time:2117171ms step_avg:2119.29ms step:1010/3242 train_loss:3.8212 train_time:2119297ms step_avg:2119.30ms step:1011/3242 train_loss:3.5941 train_time:2121414ms step_avg:2119.30ms step:1012/3242 train_loss:3.8212 train_time:2123532ms step_avg:2119.29ms step:1013/3242 train_loss:3.6926 train_time:2125648ms step_avg:2119.29ms step:1014/3242 train_loss:3.4950 train_time:2127769ms step_avg:2119.29ms step:1015/3242 train_loss:3.6326 train_time:2129892ms step_avg:2119.30ms step:1016/3242 train_loss:3.6714 train_time:2132002ms step_avg:2119.29ms step:1017/3242 train_loss:3.3955 train_time:2134128ms step_avg:2119.29ms step:1018/3242 train_loss:3.5802 train_time:2136246ms step_avg:2119.29ms step:1019/3242 train_loss:3.5629 train_time:2138367ms step_avg:2119.29ms step:1020/3242 train_loss:3.4960 train_time:2140502ms step_avg:2119.31ms step:1021/3242 train_loss:3.4992 train_time:2142615ms step_avg:2119.30ms step:1022/3242 train_loss:3.4279 train_time:2144736ms step_avg:2119.30ms step:1023/3242 train_loss:3.2140 train_time:2146853ms step_avg:2119.30ms step:1024/3242 train_loss:3.7650 train_time:2148969ms step_avg:2119.30ms step:1025/3242 train_loss:3.4986 train_time:2151090ms step_avg:2119.30ms step:1026/3242 train_loss:3.7588 train_time:2153210ms step_avg:2119.30ms step:1027/3242 train_loss:3.5994 train_time:2155332ms step_avg:2119.30ms step:1028/3242 train_loss:3.4610 train_time:2157441ms step_avg:2119.29ms step:1029/3242 train_loss:3.6863 train_time:2159569ms step_avg:2119.30ms step:1030/3242 train_loss:3.5643 train_time:2161685ms step_avg:2119.30ms step:1031/3242 train_loss:3.4863 train_time:2163804ms step_avg:2119.30ms step:1032/3242 train_loss:3.5189 train_time:2165923ms step_avg:2119.30ms step:1033/3242 train_loss:3.4365 train_time:2168048ms step_avg:2119.30ms step:1034/3242 train_loss:3.6910 train_time:2170165ms step_avg:2119.30ms step:1035/3242 train_loss:3.4874 train_time:2172280ms step_avg:2119.30ms step:1036/3242 train_loss:3.5787 train_time:2174401ms step_avg:2119.30ms step:1037/3242 train_loss:3.6124 train_time:2176526ms step_avg:2119.30ms step:1038/3242 train_loss:3.8208 train_time:2178644ms step_avg:2119.30ms step:1039/3242 train_loss:4.6145 train_time:2180755ms step_avg:2119.30ms step:1040/3242 train_loss:3.5697 train_time:2182877ms step_avg:2119.30ms step:1041/3242 train_loss:3.5242 train_time:2184997ms step_avg:2119.30ms step:1042/3242 train_loss:3.5934 train_time:2187121ms step_avg:2119.30ms step:1043/3242 train_loss:3.4588 train_time:2189241ms step_avg:2119.30ms step:1044/3242 train_loss:3.4520 train_time:2191359ms step_avg:2119.30ms step:1045/3242 train_loss:3.4741 train_time:2193475ms step_avg:2119.30ms step:1046/3242 train_loss:3.5180 train_time:2195593ms step_avg:2119.30ms step:1047/3242 train_loss:3.8758 train_time:2197713ms step_avg:2119.30ms step:1048/3242 train_loss:3.7143 train_time:2199839ms step_avg:2119.31ms step:1049/3242 train_loss:4.0948 train_time:2201952ms step_avg:2119.30ms step:1050/3242 train_loss:3.6880 train_time:2204073ms step_avg:2119.30ms step:1051/3242 train_loss:3.6486 train_time:2206190ms step_avg:2119.30ms step:1052/3242 train_loss:3.4506 train_time:2208310ms step_avg:2119.30ms step:1053/3242 train_loss:3.6238 train_time:2210428ms step_avg:2119.30ms step:1054/3242 train_loss:3.4204 train_time:2212554ms step_avg:2119.30ms step:1055/3242 train_loss:3.6798 train_time:2214671ms step_avg:2119.30ms step:1056/3242 train_loss:3.4259 train_time:2216790ms step_avg:2119.30ms step:1057/3242 train_loss:3.5434 train_time:2218900ms step_avg:2119.29ms step:1058/3242 train_loss:3.8643 train_time:2221018ms step_avg:2119.29ms step:1059/3242 train_loss:3.6451 train_time:2223137ms step_avg:2119.29ms step:1060/3242 train_loss:3.3732 train_time:2225260ms step_avg:2119.30ms step:1061/3242 train_loss:3.6146 train_time:2227380ms step_avg:2119.30ms step:1062/3242 train_loss:3.6235 train_time:2229495ms step_avg:2119.29ms step:1063/3242 train_loss:3.4575 train_time:2231619ms step_avg:2119.30ms step:1064/3242 train_loss:3.4760 train_time:2233731ms step_avg:2119.29ms step:1065/3242 train_loss:3.5589 train_time:2235853ms step_avg:2119.29ms step:1066/3242 train_loss:3.4660 train_time:2237976ms step_avg:2119.30ms step:1067/3242 train_loss:3.4169 train_time:2240101ms step_avg:2119.30ms step:1068/3242 train_loss:3.6305 train_time:2242216ms step_avg:2119.30ms step:1069/3242 train_loss:3.2561 train_time:2244337ms step_avg:2119.30ms step:1070/3242 train_loss:3.5729 train_time:2246462ms step_avg:2119.30ms step:1071/3242 train_loss:3.0296 train_time:2248579ms step_avg:2119.30ms step:1072/3242 train_loss:3.8631 train_time:2250696ms step_avg:2119.30ms step:1073/3242 train_loss:3.8382 train_time:2252821ms step_avg:2119.30ms step:1074/3242 train_loss:3.5071 train_time:2254934ms step_avg:2119.30ms step:1075/3242 train_loss:3.8450 train_time:2257057ms step_avg:2119.30ms step:1076/3242 train_loss:3.6455 train_time:2259177ms step_avg:2119.30ms step:1077/3242 train_loss:3.5085 train_time:2261299ms step_avg:2119.31ms step:1078/3242 train_loss:3.6840 train_time:2263416ms step_avg:2119.30ms step:1079/3242 train_loss:3.6105 train_time:2265536ms step_avg:2119.30ms step:1080/3242 train_loss:3.3952 train_time:2267649ms step_avg:2119.30ms step:1081/3242 train_loss:3.4100 train_time:2269770ms step_avg:2119.30ms step:1082/3242 train_loss:3.7363 train_time:2271888ms step_avg:2119.30ms step:1083/3242 train_loss:3.5851 train_time:2274014ms step_avg:2119.31ms step:1084/3242 train_loss:3.7832 train_time:2276130ms step_avg:2119.30ms step:1085/3242 train_loss:3.8225 train_time:2278247ms step_avg:2119.30ms step:1086/3242 train_loss:3.5429 train_time:2280367ms step_avg:2119.30ms step:1087/3242 train_loss:3.6127 train_time:2282485ms step_avg:2119.30ms step:1088/3242 train_loss:3.3568 train_time:2284606ms step_avg:2119.30ms step:1089/3242 train_loss:3.5967 train_time:2286727ms step_avg:2119.30ms step:1090/3242 train_loss:3.4874 train_time:2288849ms step_avg:2119.30ms step:1091/3242 train_loss:3.5156 train_time:2290964ms step_avg:2119.30ms step:1092/3242 train_loss:3.8633 train_time:2293085ms step_avg:2119.30ms step:1093/3242 train_loss:3.4986 train_time:2295203ms step_avg:2119.30ms step:1094/3242 train_loss:3.5968 train_time:2297322ms step_avg:2119.30ms step:1095/3242 train_loss:3.4326 train_time:2299442ms step_avg:2119.30ms step:1096/3242 train_loss:3.7871 train_time:2301559ms step_avg:2119.30ms step:1097/3242 train_loss:3.5727 train_time:2303681ms step_avg:2119.30ms step:1098/3242 train_loss:3.5455 train_time:2305797ms step_avg:2119.30ms step:1099/3242 train_loss:3.5770 train_time:2307921ms step_avg:2119.30ms step:1100/3242 train_loss:3.3614 train_time:2310042ms step_avg:2119.30ms step:1101/3242 train_loss:3.4800 train_time:2312161ms step_avg:2119.30ms step:1102/3242 train_loss:3.6338 train_time:2314281ms step_avg:2119.31ms step:1103/3242 train_loss:3.6006 train_time:2316395ms step_avg:2119.30ms step:1104/3242 train_loss:3.4288 train_time:2318513ms step_avg:2119.30ms step:1105/3242 train_loss:3.5449 train_time:2320636ms step_avg:2119.30ms step:1106/3242 train_loss:3.5065 train_time:2322753ms step_avg:2119.30ms step:1107/3242 train_loss:3.7078 train_time:2324882ms step_avg:2119.31ms step:1108/3242 train_loss:3.3299 train_time:2326995ms step_avg:2119.30ms step:1109/3242 train_loss:3.6357 train_time:2329112ms step_avg:2119.30ms step:1110/3242 train_loss:3.5643 train_time:2331237ms step_avg:2119.31ms step:1111/3242 train_loss:3.5196 train_time:2333356ms step_avg:2119.31ms step:1112/3242 train_loss:3.5331 train_time:2335474ms step_avg:2119.30ms step:1113/3242 train_loss:3.6045 train_time:2337591ms step_avg:2119.30ms step:1114/3242 train_loss:3.9145 train_time:2339711ms step_avg:2119.30ms step:1115/3242 train_loss:3.5937 train_time:2341828ms step_avg:2119.30ms step:1116/3242 train_loss:3.6061 train_time:2343947ms step_avg:2119.30ms step:1117/3242 train_loss:3.4123 train_time:2346070ms step_avg:2119.30ms step:1118/3242 train_loss:3.5052 train_time:2348187ms step_avg:2119.30ms step:1119/3242 train_loss:3.5586 train_time:2350310ms step_avg:2119.31ms step:1120/3242 train_loss:3.5619 train_time:2352428ms step_avg:2119.30ms step:1121/3242 train_loss:3.5678 train_time:2354541ms step_avg:2119.30ms step:1122/3242 train_loss:3.6390 train_time:2356663ms step_avg:2119.30ms step:1123/3242 train_loss:3.7446 train_time:2358783ms step_avg:2119.30ms step:1124/3242 train_loss:3.5192 train_time:2360901ms step_avg:2119.30ms step:1125/3242 train_loss:3.7237 train_time:2363023ms step_avg:2119.30ms step:1125/3242 val_loss:3.5712 train_time:2363435ms step_avg:2119.67ms step:1126/3242 train_loss:3.4222 train_time:2365155ms step_avg:2119.31ms step:1127/3242 train_loss:3.3171 train_time:2367272ms step_avg:2119.31ms step:1128/3242 train_loss:3.5963 train_time:2369392ms step_avg:2119.31ms step:1129/3242 train_loss:4.5737 train_time:2371513ms step_avg:2119.31ms step:1130/3242 train_loss:3.4964 train_time:2373630ms step_avg:2119.31ms step:1131/3242 train_loss:3.5247 train_time:2375744ms step_avg:2119.31ms step:1132/3242 train_loss:3.8017 train_time:2377865ms step_avg:2119.31ms step:1133/3242 train_loss:3.6554 train_time:2379990ms step_avg:2119.31ms step:1134/3242 train_loss:3.6900 train_time:2382110ms step_avg:2119.31ms step:1135/3242 train_loss:3.8141 train_time:2384228ms step_avg:2119.31ms step:1136/3242 train_loss:3.6174 train_time:2386348ms step_avg:2119.31ms step:1137/3242 train_loss:3.7580 train_time:2388464ms step_avg:2119.31ms step:1138/3242 train_loss:3.7507 train_time:2390586ms step_avg:2119.31ms step:1139/3242 train_loss:3.7239 train_time:2392701ms step_avg:2119.31ms step:1140/3242 train_loss:3.5857 train_time:2394822ms step_avg:2119.31ms step:1141/3242 train_loss:3.6713 train_time:2396940ms step_avg:2119.31ms step:1142/3242 train_loss:3.7021 train_time:2399061ms step_avg:2119.31ms step:1143/3242 train_loss:3.5604 train_time:2401302ms step_avg:2119.42ms step:1144/3242 train_loss:3.8220 train_time:2403423ms step_avg:2119.42ms step:1145/3242 train_loss:2.9723 train_time:2405541ms step_avg:2119.42ms step:1146/3242 train_loss:3.7381 train_time:2407659ms step_avg:2119.42ms step:1147/3242 train_loss:3.4243 train_time:2409776ms step_avg:2119.42ms step:1148/3242 train_loss:3.9124 train_time:2411898ms step_avg:2119.42ms step:1149/3242 train_loss:3.6988 train_time:2414013ms step_avg:2119.41ms step:1150/3242 train_loss:3.7007 train_time:2416137ms step_avg:2119.42ms step:1151/3242 train_loss:3.6303 train_time:2418255ms step_avg:2119.42ms step:1152/3242 train_loss:3.5856 train_time:2420378ms step_avg:2119.42ms step:1153/3242 train_loss:3.6139 train_time:2422494ms step_avg:2119.42ms step:1154/3242 train_loss:4.6753 train_time:2424611ms step_avg:2119.42ms step:1155/3242 train_loss:3.7559 train_time:2426731ms step_avg:2119.42ms step:1156/3242 train_loss:3.1894 train_time:2428850ms step_avg:2119.42ms step:1157/3242 train_loss:3.5840 train_time:2430967ms step_avg:2119.41ms step:1158/3242 train_loss:3.5604 train_time:2433097ms step_avg:2119.42ms step:1159/3242 train_loss:3.7171 train_time:2435213ms step_avg:2119.42ms step:1160/3242 train_loss:3.4358 train_time:2437340ms step_avg:2119.43ms step:1161/3242 train_loss:3.6057 train_time:2439459ms step_avg:2119.43ms step:1162/3242 train_loss:3.6014 train_time:2441576ms step_avg:2119.42ms step:1163/3242 train_loss:3.6059 train_time:2443700ms step_avg:2119.43ms step:1164/3242 train_loss:3.5034 train_time:2445819ms step_avg:2119.43ms step:1165/3242 train_loss:3.6211 train_time:2447931ms step_avg:2119.42ms step:1166/3242 train_loss:3.7171 train_time:2450048ms step_avg:2119.42ms step:1167/3242 train_loss:3.4919 train_time:2452178ms step_avg:2119.43ms step:1168/3242 train_loss:3.4724 train_time:2454292ms step_avg:2119.42ms step:1169/3242 train_loss:3.6938 train_time:2456419ms step_avg:2119.43ms step:1170/3242 train_loss:3.5431 train_time:2458536ms step_avg:2119.43ms step:1171/3242 train_loss:3.5529 train_time:2460650ms step_avg:2119.42ms step:1172/3242 train_loss:3.6893 train_time:2462775ms step_avg:2119.43ms step:1173/3242 train_loss:3.5547 train_time:2464894ms step_avg:2119.43ms step:1174/3242 train_loss:3.4675 train_time:2467008ms step_avg:2119.42ms step:1175/3242 train_loss:3.5378 train_time:2469130ms step_avg:2119.42ms step:1176/3242 train_loss:3.5584 train_time:2471252ms step_avg:2119.43ms step:1177/3242 train_loss:3.3665 train_time:2473368ms step_avg:2119.42ms step:1178/3242 train_loss:4.0401 train_time:2475486ms step_avg:2119.42ms step:1179/3242 train_loss:3.5149 train_time:2477606ms step_avg:2119.42ms step:1180/3242 train_loss:3.4660 train_time:2479728ms step_avg:2119.43ms step:1181/3242 train_loss:3.5419 train_time:2481847ms step_avg:2119.43ms step:1182/3242 train_loss:3.8335 train_time:2483968ms step_avg:2119.43ms step:1183/3242 train_loss:3.4009 train_time:2486087ms step_avg:2119.43ms step:1184/3242 train_loss:3.6214 train_time:2488201ms step_avg:2119.42ms step:1185/3242 train_loss:3.7447 train_time:2490327ms step_avg:2119.43ms step:1186/3242 train_loss:3.8239 train_time:2492448ms step_avg:2119.43ms step:1187/3242 train_loss:3.5110 train_time:2494563ms step_avg:2119.43ms step:1188/3242 train_loss:3.4769 train_time:2496679ms step_avg:2119.42ms step:1189/3242 train_loss:3.5478 train_time:2498803ms step_avg:2119.43ms step:1190/3242 train_loss:3.3761 train_time:2500919ms step_avg:2119.42ms step:1191/3242 train_loss:3.5257 train_time:2503036ms step_avg:2119.42ms step:1192/3242 train_loss:3.6202 train_time:2505159ms step_avg:2119.42ms step:1193/3242 train_loss:3.2422 train_time:2507283ms step_avg:2119.43ms step:1194/3242 train_loss:3.7624 train_time:2509392ms step_avg:2119.42ms step:1195/3242 train_loss:3.4611 train_time:2511513ms step_avg:2119.42ms step:1196/3242 train_loss:3.5674 train_time:2513640ms step_avg:2119.43ms step:1197/3242 train_loss:3.4500 train_time:2515759ms step_avg:2119.43ms step:1198/3242 train_loss:3.7972 train_time:2517879ms step_avg:2119.43ms step:1199/3242 train_loss:3.5678 train_time:2519998ms step_avg:2119.43ms step:1200/3242 train_loss:3.6457 train_time:2522114ms step_avg:2119.42ms step:1201/3242 train_loss:3.8743 train_time:2524230ms step_avg:2119.42ms step:1202/3242 train_loss:3.3979 train_time:2526349ms step_avg:2119.42ms step:1203/3242 train_loss:3.4073 train_time:2528463ms step_avg:2119.42ms step:1204/3242 train_loss:3.6087 train_time:2530584ms step_avg:2119.42ms step:1205/3242 train_loss:3.6465 train_time:2532708ms step_avg:2119.42ms step:1206/3242 train_loss:3.3948 train_time:2534827ms step_avg:2119.42ms step:1207/3242 train_loss:3.4472 train_time:2536953ms step_avg:2119.43ms step:1208/3242 train_loss:3.6627 train_time:2539063ms step_avg:2119.42ms step:1209/3242 train_loss:3.4746 train_time:2541192ms step_avg:2119.43ms step:1210/3242 train_loss:3.4973 train_time:2543313ms step_avg:2119.43ms step:1211/3242 train_loss:3.4847 train_time:2545431ms step_avg:2119.43ms step:1212/3242 train_loss:3.9913 train_time:2547545ms step_avg:2119.42ms step:1213/3242 train_loss:3.4397 train_time:2549668ms step_avg:2119.42ms step:1214/3242 train_loss:3.7198 train_time:2551789ms step_avg:2119.43ms step:1215/3242 train_loss:3.4647 train_time:2553906ms step_avg:2119.42ms step:1216/3242 train_loss:3.6345 train_time:2556022ms step_avg:2119.42ms step:1217/3242 train_loss:3.6817 train_time:2558139ms step_avg:2119.42ms step:1218/3242 train_loss:3.5143 train_time:2560261ms step_avg:2119.42ms step:1219/3242 train_loss:3.7548 train_time:2562383ms step_avg:2119.42ms step:1220/3242 train_loss:3.6407 train_time:2564498ms step_avg:2119.42ms step:1221/3242 train_loss:3.7630 train_time:2566618ms step_avg:2119.42ms step:1222/3242 train_loss:3.5011 train_time:2568738ms step_avg:2119.42ms step:1223/3242 train_loss:3.7654 train_time:2570855ms step_avg:2119.42ms step:1224/3242 train_loss:3.4200 train_time:2572972ms step_avg:2119.42ms step:1225/3242 train_loss:3.8084 train_time:2575098ms step_avg:2119.42ms step:1226/3242 train_loss:3.5445 train_time:2577216ms step_avg:2119.42ms step:1227/3242 train_loss:3.4068 train_time:2579334ms step_avg:2119.42ms step:1228/3242 train_loss:3.4087 train_time:2581454ms step_avg:2119.42ms step:1229/3242 train_loss:3.5784 train_time:2583578ms step_avg:2119.42ms step:1230/3242 train_loss:3.5691 train_time:2585696ms step_avg:2119.42ms step:1231/3242 train_loss:3.5769 train_time:2587813ms step_avg:2119.42ms step:1232/3242 train_loss:3.6936 train_time:2589931ms step_avg:2119.42ms step:1233/3242 train_loss:3.3627 train_time:2592049ms step_avg:2119.42ms step:1234/3242 train_loss:3.3558 train_time:2594173ms step_avg:2119.42ms step:1235/3242 train_loss:3.5975 train_time:2596289ms step_avg:2119.42ms step:1236/3242 train_loss:3.6947 train_time:2598409ms step_avg:2119.42ms step:1237/3242 train_loss:3.4807 train_time:2600534ms step_avg:2119.42ms step:1238/3242 train_loss:3.9756 train_time:2602647ms step_avg:2119.42ms step:1239/3242 train_loss:3.8368 train_time:2604772ms step_avg:2119.42ms step:1240/3242 train_loss:3.4694 train_time:2606886ms step_avg:2119.42ms step:1241/3242 train_loss:3.1258 train_time:2609008ms step_avg:2119.42ms step:1242/3242 train_loss:3.7088 train_time:2611127ms step_avg:2119.42ms step:1243/3242 train_loss:3.5178 train_time:2613247ms step_avg:2119.42ms step:1244/3242 train_loss:3.4433 train_time:2615365ms step_avg:2119.42ms step:1245/3242 train_loss:4.2600 train_time:2617486ms step_avg:2119.42ms step:1246/3242 train_loss:3.4679 train_time:2619609ms step_avg:2119.42ms step:1247/3242 train_loss:3.5349 train_time:2621730ms step_avg:2119.43ms step:1248/3242 train_loss:3.5735 train_time:2623843ms step_avg:2119.42ms step:1249/3242 train_loss:3.5275 train_time:2625963ms step_avg:2119.42ms step:1250/3242 train_loss:3.3922 train_time:2628088ms step_avg:2119.43ms step:1250/3242 val_loss:3.5432 train_time:2628502ms step_avg:2119.76ms step:1251/3242 train_loss:3.4794 train_time:2630218ms step_avg:2119.43ms step:1252/3242 train_loss:3.5771 train_time:2632344ms step_avg:2119.44ms step:1253/3242 train_loss:3.4726 train_time:2634459ms step_avg:2119.44ms step:1254/3242 train_loss:4.3853 train_time:2636582ms step_avg:2119.44ms step:1255/3242 train_loss:3.5874 train_time:2638699ms step_avg:2119.44ms step:1256/3242 train_loss:3.8632 train_time:2640818ms step_avg:2119.44ms step:1257/3242 train_loss:3.4583 train_time:2642935ms step_avg:2119.43ms step:1258/3242 train_loss:3.6661 train_time:2645055ms step_avg:2119.44ms step:1259/3242 train_loss:3.6119 train_time:2647179ms step_avg:2119.44ms step:1260/3242 train_loss:3.4316 train_time:2649294ms step_avg:2119.43ms step:1261/3242 train_loss:3.5735 train_time:2651411ms step_avg:2119.43ms step:1262/3242 train_loss:3.5316 train_time:2653534ms step_avg:2119.44ms step:1263/3242 train_loss:3.4837 train_time:2655653ms step_avg:2119.44ms step:1264/3242 train_loss:3.4914 train_time:2657778ms step_avg:2119.44ms step:1265/3242 train_loss:3.4733 train_time:2659895ms step_avg:2119.44ms step:1266/3242 train_loss:3.4918 train_time:2662015ms step_avg:2119.44ms step:1267/3242 train_loss:3.5117 train_time:2664129ms step_avg:2119.43ms step:1268/3242 train_loss:3.0533 train_time:2666250ms step_avg:2119.44ms step:1269/3242 train_loss:3.7279 train_time:2668368ms step_avg:2119.43ms step:1270/3242 train_loss:3.5275 train_time:2670493ms step_avg:2119.44ms step:1271/3242 train_loss:3.5435 train_time:2672613ms step_avg:2119.44ms step:1272/3242 train_loss:3.3769 train_time:2674727ms step_avg:2119.43ms step:1273/3242 train_loss:3.7320 train_time:2676849ms step_avg:2119.44ms step:1274/3242 train_loss:3.5659 train_time:2678966ms step_avg:2119.44ms step:1275/3242 train_loss:3.7663 train_time:2681084ms step_avg:2119.43ms step:1276/3242 train_loss:3.7146 train_time:2683201ms step_avg:2119.43ms step:1277/3242 train_loss:3.5604 train_time:2685323ms step_avg:2119.43ms step:1278/3242 train_loss:3.5247 train_time:2687448ms step_avg:2119.44ms step:1279/3242 train_loss:3.3830 train_time:2689568ms step_avg:2119.44ms step:1280/3242 train_loss:3.1821 train_time:2691685ms step_avg:2119.44ms step:1281/3242 train_loss:3.3225 train_time:2693804ms step_avg:2119.44ms step:1282/3242 train_loss:3.4769 train_time:2695917ms step_avg:2119.43ms step:1283/3242 train_loss:3.9185 train_time:2698031ms step_avg:2119.43ms step:1284/3242 train_loss:3.5667 train_time:2700158ms step_avg:2119.43ms step:1285/3242 train_loss:3.3996 train_time:2702272ms step_avg:2119.43ms step:1286/3242 train_loss:3.2859 train_time:2704390ms step_avg:2119.43ms step:1287/3242 train_loss:3.5902 train_time:2706515ms step_avg:2119.43ms step:1288/3242 train_loss:3.6008 train_time:2708628ms step_avg:2119.43ms step:1289/3242 train_loss:3.7610 train_time:2710751ms step_avg:2119.43ms step:1290/3242 train_loss:3.5434 train_time:2712868ms step_avg:2119.43ms step:1291/3242 train_loss:3.6800 train_time:2714991ms step_avg:2119.43ms step:1292/3242 train_loss:3.5810 train_time:2717110ms step_avg:2119.43ms step:1293/3242 train_loss:3.4305 train_time:2719235ms step_avg:2119.44ms step:1294/3242 train_loss:3.3521 train_time:2721352ms step_avg:2119.43ms step:1295/3242 train_loss:3.5375 train_time:2723478ms step_avg:2119.44ms step:1296/3242 train_loss:3.6263 train_time:2725614ms step_avg:2119.45ms step:1297/3242 train_loss:3.7753 train_time:2727727ms step_avg:2119.45ms step:1298/3242 train_loss:3.6501 train_time:2729848ms step_avg:2119.45ms step:1299/3242 train_loss:3.5443 train_time:2731965ms step_avg:2119.45ms step:1300/3242 train_loss:3.3997 train_time:2734086ms step_avg:2119.45ms step:1301/3242 train_loss:3.9826 train_time:2736204ms step_avg:2119.45ms step:1302/3242 train_loss:3.5735 train_time:2738319ms step_avg:2119.44ms step:1303/3242 train_loss:3.7783 train_time:2740443ms step_avg:2119.45ms step:1304/3242 train_loss:3.7301 train_time:2742564ms step_avg:2119.45ms step:1305/3242 train_loss:3.4782 train_time:2744679ms step_avg:2119.44ms step:1306/3242 train_loss:3.6159 train_time:2746797ms step_avg:2119.44ms step:1307/3242 train_loss:3.5441 train_time:2748916ms step_avg:2119.44ms step:1308/3242 train_loss:3.5312 train_time:2751034ms step_avg:2119.44ms step:1309/3242 train_loss:3.4385 train_time:2753153ms step_avg:2119.44ms step:1310/3242 train_loss:3.7376 train_time:2755283ms step_avg:2119.45ms step:1311/3242 train_loss:3.5171 train_time:2757401ms step_avg:2119.45ms step:1312/3242 train_loss:3.4898 train_time:2759521ms step_avg:2119.45ms step:1313/3242 train_loss:3.6043 train_time:2761629ms step_avg:2119.44ms step:1314/3242 train_loss:3.8331 train_time:2763758ms step_avg:2119.45ms step:1315/3242 train_loss:3.6389 train_time:2765878ms step_avg:2119.45ms step:1316/3242 train_loss:3.6119 train_time:2767991ms step_avg:2119.44ms step:1317/3242 train_loss:3.5882 train_time:2770110ms step_avg:2119.44ms step:1318/3242 train_loss:3.4280 train_time:2772233ms step_avg:2119.44ms step:1319/3242 train_loss:3.6650 train_time:2774349ms step_avg:2119.44ms step:1320/3242 train_loss:3.5010 train_time:2776465ms step_avg:2119.44ms step:1321/3242 train_loss:3.9148 train_time:2778590ms step_avg:2119.44ms step:1322/3242 train_loss:3.6498 train_time:2780712ms step_avg:2119.45ms step:1323/3242 train_loss:3.5482 train_time:2782830ms step_avg:2119.44ms step:1324/3242 train_loss:3.6031 train_time:2784954ms step_avg:2119.45ms step:1325/3242 train_loss:3.7721 train_time:2787066ms step_avg:2119.44ms step:1326/3242 train_loss:3.5586 train_time:2789186ms step_avg:2119.44ms step:1327/3242 train_loss:3.6579 train_time:2791300ms step_avg:2119.44ms step:1328/3242 train_loss:3.4685 train_time:2793419ms step_avg:2119.44ms step:1329/3242 train_loss:3.4617 train_time:2795544ms step_avg:2119.44ms step:1330/3242 train_loss:3.4622 train_time:2797665ms step_avg:2119.44ms step:1331/3242 train_loss:2.3700 train_time:2799775ms step_avg:2119.44ms step:1332/3242 train_loss:3.8176 train_time:2801896ms step_avg:2119.44ms step:1333/3242 train_loss:3.6154 train_time:2804021ms step_avg:2119.44ms step:1334/3242 train_loss:3.5826 train_time:2806274ms step_avg:2119.54ms step:1335/3242 train_loss:3.4535 train_time:2808396ms step_avg:2119.54ms step:1336/3242 train_loss:3.7235 train_time:2810516ms step_avg:2119.54ms step:1337/3242 train_loss:3.6270 train_time:2812634ms step_avg:2119.54ms step:1338/3242 train_loss:3.4176 train_time:2814753ms step_avg:2119.54ms step:1339/3242 train_loss:3.6073 train_time:2816880ms step_avg:2119.55ms step:1340/3242 train_loss:3.6850 train_time:2818993ms step_avg:2119.54ms step:1341/3242 train_loss:3.7816 train_time:2821119ms step_avg:2119.55ms step:1342/3242 train_loss:3.4146 train_time:2823234ms step_avg:2119.54ms step:1343/3242 train_loss:3.5789 train_time:2825354ms step_avg:2119.55ms step:1344/3242 train_loss:3.7870 train_time:2827479ms step_avg:2119.55ms step:1345/3242 train_loss:3.6296 train_time:2829601ms step_avg:2119.55ms step:1346/3242 train_loss:3.7825 train_time:2831718ms step_avg:2119.55ms step:1347/3242 train_loss:3.6510 train_time:2833829ms step_avg:2119.54ms step:1348/3242 train_loss:3.4786 train_time:2835952ms step_avg:2119.55ms step:1349/3242 train_loss:3.4614 train_time:2838074ms step_avg:2119.55ms step:1350/3242 train_loss:3.4493 train_time:2840187ms step_avg:2119.54ms step:1351/3242 train_loss:3.4514 train_time:2842310ms step_avg:2119.55ms step:1352/3242 train_loss:3.5321 train_time:2844428ms step_avg:2119.54ms step:1353/3242 train_loss:3.6705 train_time:2846555ms step_avg:2119.55ms step:1354/3242 train_loss:3.7107 train_time:2848666ms step_avg:2119.54ms step:1355/3242 train_loss:3.5623 train_time:2850788ms step_avg:2119.55ms step:1356/3242 train_loss:3.5268 train_time:2852906ms step_avg:2119.54ms step:1357/3242 train_loss:3.3522 train_time:2855023ms step_avg:2119.54ms step:1358/3242 train_loss:3.5544 train_time:2857150ms step_avg:2119.55ms step:1359/3242 train_loss:3.5509 train_time:2859265ms step_avg:2119.54ms step:1360/3242 train_loss:3.9544 train_time:2861387ms step_avg:2119.55ms step:1361/3242 train_loss:3.2525 train_time:2863508ms step_avg:2119.55ms step:1362/3242 train_loss:3.6194 train_time:2865619ms step_avg:2119.54ms step:1363/3242 train_loss:3.6623 train_time:2867744ms step_avg:2119.55ms step:1364/3242 train_loss:3.3791 train_time:2869860ms step_avg:2119.54ms step:1365/3242 train_loss:3.5337 train_time:2871985ms step_avg:2119.55ms step:1366/3242 train_loss:3.5298 train_time:2874097ms step_avg:2119.54ms step:1367/3242 train_loss:3.6327 train_time:2876222ms step_avg:2119.54ms step:1368/3242 train_loss:3.5308 train_time:2878339ms step_avg:2119.54ms step:1369/3242 train_loss:3.7765 train_time:2880457ms step_avg:2119.54ms step:1370/3242 train_loss:3.6394 train_time:2882577ms step_avg:2119.54ms step:1371/3242 train_loss:3.6188 train_time:2884700ms step_avg:2119.54ms step:1372/3242 train_loss:3.6601 train_time:2886820ms step_avg:2119.54ms step:1373/3242 train_loss:3.7001 train_time:2888935ms step_avg:2119.54ms step:1374/3242 train_loss:3.5430 train_time:2891055ms step_avg:2119.54ms step:1375/3242 train_loss:3.2875 train_time:2893176ms step_avg:2119.54ms step:1375/3242 val_loss:3.5215 train_time:2893588ms step_avg:2119.84ms step:1376/3242 train_loss:3.5626 train_time:2895307ms step_avg:2119.55ms step:1377/3242 train_loss:3.4475 train_time:2897421ms step_avg:2119.55ms step:1378/3242 train_loss:3.9269 train_time:2899539ms step_avg:2119.55ms step:1379/3242 train_loss:3.7291 train_time:2901668ms step_avg:2119.55ms step:1380/3242 train_loss:3.6049 train_time:2903784ms step_avg:2119.55ms step:1381/3242 train_loss:3.5020 train_time:2905912ms step_avg:2119.56ms step:1382/3242 train_loss:3.4637 train_time:2908034ms step_avg:2119.56ms step:1383/3242 train_loss:3.5742 train_time:2910147ms step_avg:2119.55ms step:1384/3242 train_loss:3.6169 train_time:2912262ms step_avg:2119.55ms step:1385/3242 train_loss:3.5125 train_time:2914391ms step_avg:2119.56ms step:1386/3242 train_loss:3.3839 train_time:2916511ms step_avg:2119.56ms step:1387/3242 train_loss:3.5596 train_time:2918625ms step_avg:2119.55ms step:1388/3242 train_loss:3.5603 train_time:2920748ms step_avg:2119.56ms step:1389/3242 train_loss:3.6231 train_time:2922864ms step_avg:2119.55ms step:1390/3242 train_loss:3.5814 train_time:2924985ms step_avg:2119.55ms step:1391/3242 train_loss:3.6344 train_time:2927106ms step_avg:2119.56ms step:1392/3242 train_loss:3.3785 train_time:2929223ms step_avg:2119.55ms step:1393/3242 train_loss:3.5254 train_time:2931343ms step_avg:2119.55ms step:1394/3242 train_loss:3.5269 train_time:2933466ms step_avg:2119.56ms step:1395/3242 train_loss:3.4253 train_time:2935584ms step_avg:2119.55ms step:1396/3242 train_loss:3.6913 train_time:2937700ms step_avg:2119.55ms step:1397/3242 train_loss:3.4611 train_time:2939822ms step_avg:2119.55ms step:1398/3242 train_loss:3.5714 train_time:2941940ms step_avg:2119.55ms step:1399/3242 train_loss:3.4527 train_time:2944049ms step_avg:2119.55ms step:1400/3242 train_loss:3.3223 train_time:2946174ms step_avg:2119.55ms step:1401/3242 train_loss:3.6172 train_time:2948298ms step_avg:2119.55ms step:1402/3242 train_loss:3.5746 train_time:2950415ms step_avg:2119.55ms step:1403/3242 train_loss:3.4979 train_time:2952530ms step_avg:2119.55ms step:1404/3242 train_loss:3.5084 train_time:2954652ms step_avg:2119.55ms step:1405/3242 train_loss:3.4444 train_time:2956776ms step_avg:2119.55ms step:1406/3242 train_loss:3.5037 train_time:2958890ms step_avg:2119.55ms step:1407/3242 train_loss:3.5753 train_time:2961016ms step_avg:2119.55ms step:1408/3242 train_loss:3.6770 train_time:2963133ms step_avg:2119.55ms step:1409/3242 train_loss:3.5274 train_time:2965251ms step_avg:2119.55ms step:1410/3242 train_loss:3.5074 train_time:2967373ms step_avg:2119.55ms step:1411/3242 train_loss:3.4684 train_time:2969492ms step_avg:2119.55ms step:1412/3242 train_loss:3.5453 train_time:2971609ms step_avg:2119.55ms step:1413/3242 train_loss:3.5224 train_time:2973728ms step_avg:2119.55ms step:1414/3242 train_loss:3.5551 train_time:2975842ms step_avg:2119.55ms step:1415/3242 train_loss:3.5780 train_time:2977962ms step_avg:2119.55ms step:1416/3242 train_loss:3.3814 train_time:2980090ms step_avg:2119.55ms step:1417/3242 train_loss:3.4879 train_time:2982202ms step_avg:2119.55ms step:1418/3242 train_loss:3.3540 train_time:2984320ms step_avg:2119.55ms step:1419/3242 train_loss:3.4014 train_time:2986433ms step_avg:2119.54ms step:1420/3242 train_loss:3.2492 train_time:2988559ms step_avg:2119.55ms step:1421/3242 train_loss:3.3666 train_time:2990685ms step_avg:2119.55ms step:1422/3242 train_loss:3.6282 train_time:2992799ms step_avg:2119.55ms step:1423/3242 train_loss:3.7805 train_time:2994914ms step_avg:2119.54ms step:1424/3242 train_loss:3.4328 train_time:2997033ms step_avg:2119.54ms step:1425/3242 train_loss:3.7665 train_time:2999160ms step_avg:2119.55ms step:1426/3242 train_loss:3.5677 train_time:3001283ms step_avg:2119.55ms step:1427/3242 train_loss:3.6492 train_time:3003399ms step_avg:2119.55ms step:1428/3242 train_loss:2.9029 train_time:3005522ms step_avg:2119.55ms step:1429/3242 train_loss:3.7073 train_time:3007637ms step_avg:2119.55ms step:1430/3242 train_loss:3.4946 train_time:3009754ms step_avg:2119.54ms step:1431/3242 train_loss:3.4393 train_time:3011878ms step_avg:2119.55ms step:1432/3242 train_loss:3.4545 train_time:3013991ms step_avg:2119.54ms step:1433/3242 train_loss:3.9522 train_time:3016110ms step_avg:2119.54ms step:1434/3242 train_loss:3.5870 train_time:3018248ms step_avg:2119.56ms step:1435/3242 train_loss:3.3660 train_time:3020368ms step_avg:2119.56ms step:1436/3242 train_loss:3.4627 train_time:3022485ms step_avg:2119.55ms step:1437/3242 train_loss:3.4296 train_time:3024603ms step_avg:2119.55ms step:1438/3242 train_loss:3.4148 train_time:3026730ms step_avg:2119.56ms step:1439/3242 train_loss:3.3887 train_time:3028849ms step_avg:2119.56ms step:1440/3242 train_loss:3.5383 train_time:3030969ms step_avg:2119.56ms step:1441/3242 train_loss:3.5501 train_time:3033081ms step_avg:2119.55ms step:1442/3242 train_loss:3.9349 train_time:3035201ms step_avg:2119.55ms step:1443/3242 train_loss:3.4695 train_time:3037321ms step_avg:2119.55ms step:1444/3242 train_loss:3.4914 train_time:3039439ms step_avg:2119.55ms step:1445/3242 train_loss:3.8510 train_time:3041559ms step_avg:2119.55ms step:1446/3242 train_loss:3.5035 train_time:3043670ms step_avg:2119.55ms step:1447/3242 train_loss:3.5781 train_time:3045789ms step_avg:2119.55ms step:1448/3242 train_loss:3.5157 train_time:3047915ms step_avg:2119.55ms step:1449/3242 train_loss:3.2671 train_time:3050033ms step_avg:2119.55ms step:1450/3242 train_loss:3.4581 train_time:3052151ms step_avg:2119.55ms step:1451/3242 train_loss:3.4814 train_time:3054272ms step_avg:2119.55ms step:1452/3242 train_loss:3.6637 train_time:3056391ms step_avg:2119.55ms step:1453/3242 train_loss:3.8111 train_time:3058506ms step_avg:2119.55ms step:1454/3242 train_loss:3.3967 train_time:3060626ms step_avg:2119.55ms step:1455/3242 train_loss:3.8892 train_time:3062743ms step_avg:2119.55ms step:1456/3242 train_loss:3.4635 train_time:3064868ms step_avg:2119.55ms step:1457/3242 train_loss:3.5628 train_time:3066997ms step_avg:2119.56ms step:1458/3242 train_loss:3.6426 train_time:3069112ms step_avg:2119.55ms step:1459/3242 train_loss:3.4334 train_time:3071237ms step_avg:2119.56ms step:1460/3242 train_loss:3.4241 train_time:3073361ms step_avg:2119.56ms step:1461/3242 train_loss:3.4886 train_time:3075478ms step_avg:2119.56ms step:1462/3242 train_loss:3.4892 train_time:3077599ms step_avg:2119.56ms step:1463/3242 train_loss:3.3471 train_time:3079714ms step_avg:2119.56ms step:1464/3242 train_loss:3.4696 train_time:3081830ms step_avg:2119.55ms step:1465/3242 train_loss:3.4813 train_time:3083954ms step_avg:2119.56ms step:1466/3242 train_loss:3.8915 train_time:3086071ms step_avg:2119.55ms step:1467/3242 train_loss:3.4931 train_time:3088186ms step_avg:2119.55ms step:1468/3242 train_loss:3.3641 train_time:3090303ms step_avg:2119.55ms step:1469/3242 train_loss:3.3561 train_time:3092429ms step_avg:2119.55ms step:1470/3242 train_loss:3.7418 train_time:3094543ms step_avg:2119.55ms step:1471/3242 train_loss:3.6811 train_time:3096660ms step_avg:2119.55ms step:1472/3242 train_loss:3.5484 train_time:3098782ms step_avg:2119.55ms step:1473/3242 train_loss:3.6453 train_time:3100907ms step_avg:2119.55ms step:1474/3242 train_loss:3.5341 train_time:3103023ms step_avg:2119.55ms step:1475/3242 train_loss:3.4900 train_time:3105136ms step_avg:2119.55ms step:1476/3242 train_loss:3.6018 train_time:3107260ms step_avg:2119.55ms step:1477/3242 train_loss:3.4960 train_time:3109371ms step_avg:2119.54ms step:1478/3242 train_loss:3.2174 train_time:3111496ms step_avg:2119.55ms step:1479/3242 train_loss:3.4033 train_time:3113609ms step_avg:2119.54ms step:1480/3242 train_loss:3.3916 train_time:3115739ms step_avg:2119.55ms step:1481/3242 train_loss:3.5389 train_time:3117853ms step_avg:2119.55ms step:1482/3242 train_loss:3.5434 train_time:3119980ms step_avg:2119.55ms step:1483/3242 train_loss:3.5967 train_time:3122088ms step_avg:2119.54ms step:1484/3242 train_loss:3.4237 train_time:3124217ms step_avg:2119.55ms step:1485/3242 train_loss:3.6893 train_time:3126332ms step_avg:2119.55ms step:1486/3242 train_loss:3.5042 train_time:3128452ms step_avg:2119.55ms step:1487/3242 train_loss:3.4496 train_time:3130575ms step_avg:2119.55ms step:1488/3242 train_loss:3.4919 train_time:3132698ms step_avg:2119.55ms step:1489/3242 train_loss:3.5386 train_time:3134819ms step_avg:2119.55ms step:1490/3242 train_loss:3.4732 train_time:3136940ms step_avg:2119.55ms step:1491/3242 train_loss:3.3485 train_time:3139060ms step_avg:2119.55ms step:1492/3242 train_loss:3.6244 train_time:3141179ms step_avg:2119.55ms step:1493/3242 train_loss:3.3401 train_time:3143294ms step_avg:2119.55ms step:1494/3242 train_loss:3.3554 train_time:3145417ms step_avg:2119.55ms step:1495/3242 train_loss:3.6425 train_time:3147531ms step_avg:2119.55ms step:1496/3242 train_loss:4.0349 train_time:3149650ms step_avg:2119.55ms step:1497/3242 train_loss:3.5199 train_time:3151766ms step_avg:2119.55ms step:1498/3242 train_loss:3.4453 train_time:3153887ms step_avg:2119.55ms step:1499/3242 train_loss:3.4443 train_time:3156002ms step_avg:2119.54ms step:1500/3242 train_loss:3.4518 train_time:3158117ms step_avg:2119.54ms step:1500/3242 val_loss:3.5013 train_time:3158528ms step_avg:2119.82ms step:1501/3242 train_loss:3.6804 train_time:3160245ms step_avg:2119.55ms step:1502/3242 train_loss:3.4867 train_time:3162364ms step_avg:2119.55ms step:1503/3242 train_loss:3.3369 train_time:3164482ms step_avg:2119.55ms step:1504/3242 train_loss:3.3022 train_time:3166594ms step_avg:2119.54ms step:1505/3242 train_loss:3.5241 train_time:3168717ms step_avg:2119.54ms step:1506/3242 train_loss:3.3320 train_time:3170830ms step_avg:2119.54ms step:1507/3242 train_loss:3.4995 train_time:3172958ms step_avg:2119.54ms step:1508/3242 train_loss:3.3463 train_time:3175079ms step_avg:2119.55ms step:1509/3242 train_loss:3.6294 train_time:3177204ms step_avg:2119.55ms step:1510/3242 train_loss:3.6358 train_time:3179323ms step_avg:2119.55ms step:1511/3242 train_loss:4.5220 train_time:3181436ms step_avg:2119.54ms step:1512/3242 train_loss:3.5114 train_time:3183562ms step_avg:2119.55ms step:1513/3242 train_loss:3.5323 train_time:3185671ms step_avg:2119.54ms step:1514/3242 train_loss:3.6637 train_time:3187787ms step_avg:2119.54ms step:1515/3242 train_loss:4.0173 train_time:3189902ms step_avg:2119.54ms step:1516/3242 train_loss:3.4895 train_time:3192025ms step_avg:2119.54ms step:1517/3242 train_loss:3.3780 train_time:3194140ms step_avg:2119.54ms step:1518/3242 train_loss:3.4790 train_time:3196263ms step_avg:2119.54ms step:1519/3242 train_loss:3.4447 train_time:3198389ms step_avg:2119.54ms step:1520/3242 train_loss:3.5168 train_time:3200507ms step_avg:2119.54ms step:1521/3242 train_loss:3.4681 train_time:3202630ms step_avg:2119.54ms step:1522/3242 train_loss:3.3726 train_time:3204743ms step_avg:2119.54ms step:1523/3242 train_loss:3.4782 train_time:3206860ms step_avg:2119.54ms step:1524/3242 train_loss:3.5544 train_time:3209109ms step_avg:2119.62ms step:1525/3242 train_loss:3.6310 train_time:3211226ms step_avg:2119.62ms step:1526/3242 train_loss:3.5149 train_time:3213349ms step_avg:2119.62ms step:1527/3242 train_loss:3.1567 train_time:3215466ms step_avg:2119.62ms step:1528/3242 train_loss:3.5078 train_time:3217584ms step_avg:2119.62ms step:1529/3242 train_loss:3.4400 train_time:3219700ms step_avg:2119.62ms step:1530/3242 train_loss:3.5291 train_time:3221814ms step_avg:2119.61ms step:1531/3242 train_loss:3.5079 train_time:3223937ms step_avg:2119.62ms step:1532/3242 train_loss:3.6411 train_time:3226052ms step_avg:2119.61ms step:1533/3242 train_loss:3.4735 train_time:3228168ms step_avg:2119.61ms step:1534/3242 train_loss:3.7479 train_time:3230291ms step_avg:2119.61ms step:1535/3242 train_loss:3.4634 train_time:3232412ms step_avg:2119.61ms step:1536/3242 train_loss:3.5069 train_time:3234524ms step_avg:2119.61ms step:1537/3242 train_loss:3.6776 train_time:3236649ms step_avg:2119.61ms step:1538/3242 train_loss:3.3031 train_time:3238770ms step_avg:2119.61ms step:1539/3242 train_loss:3.2183 train_time:3240884ms step_avg:2119.61ms step:1540/3242 train_loss:3.0784 train_time:3243003ms step_avg:2119.61ms step:1541/3242 train_loss:3.3254 train_time:3245125ms step_avg:2119.61ms step:1542/3242 train_loss:3.5357 train_time:3247240ms step_avg:2119.61ms step:1543/3242 train_loss:3.4749 train_time:3249357ms step_avg:2119.61ms step:1544/3242 train_loss:3.5548 train_time:3251474ms step_avg:2119.61ms step:1545/3242 train_loss:3.0271 train_time:3253599ms step_avg:2119.61ms step:1546/3242 train_loss:3.4027 train_time:3255717ms step_avg:2119.61ms step:1547/3242 train_loss:3.4852 train_time:3257831ms step_avg:2119.60ms step:1548/3242 train_loss:3.4501 train_time:3259943ms step_avg:2119.60ms step:1549/3242 train_loss:3.6736 train_time:3262075ms step_avg:2119.61ms step:1550/3242 train_loss:3.4855 train_time:3264190ms step_avg:2119.60ms step:1551/3242 train_loss:3.4167 train_time:3266316ms step_avg:2119.61ms step:1552/3242 train_loss:3.9644 train_time:3268434ms step_avg:2119.61ms step:1553/3242 train_loss:3.3997 train_time:3270545ms step_avg:2119.60ms step:1554/3242 train_loss:3.5425 train_time:3272664ms step_avg:2119.60ms step:1555/3242 train_loss:3.8517 train_time:3274787ms step_avg:2119.60ms step:1556/3242 train_loss:3.9501 train_time:3276910ms step_avg:2119.61ms step:1557/3242 train_loss:3.5611 train_time:3279034ms step_avg:2119.61ms step:1558/3242 train_loss:3.5700 train_time:3281145ms step_avg:2119.60ms step:1559/3242 train_loss:3.6422 train_time:3283264ms step_avg:2119.60ms step:1560/3242 train_loss:3.4031 train_time:3285386ms step_avg:2119.60ms step:1561/3242 train_loss:3.4448 train_time:3287499ms step_avg:2119.60ms step:1562/3242 train_loss:3.4091 train_time:3289618ms step_avg:2119.60ms step:1563/3242 train_loss:3.4406 train_time:3291741ms step_avg:2119.60ms step:1564/3242 train_loss:3.3437 train_time:3293861ms step_avg:2119.60ms step:1565/3242 train_loss:3.6543 train_time:3295976ms step_avg:2119.60ms step:1566/3242 train_loss:3.4045 train_time:3298100ms step_avg:2119.60ms step:1567/3242 train_loss:3.6421 train_time:3300226ms step_avg:2119.61ms step:1568/3242 train_loss:3.2928 train_time:3302340ms step_avg:2119.60ms step:1569/3242 train_loss:3.4262 train_time:3304460ms step_avg:2119.60ms step:1570/3242 train_loss:3.6252 train_time:3306577ms step_avg:2119.60ms step:1571/3242 train_loss:3.2900 train_time:3308710ms step_avg:2119.61ms step:1572/3242 train_loss:3.4790 train_time:3310843ms step_avg:2119.62ms step:1573/3242 train_loss:3.5151 train_time:3312961ms step_avg:2119.62ms step:1574/3242 train_loss:3.1992 train_time:3315073ms step_avg:2119.61ms step:1575/3242 train_loss:3.3729 train_time:3317198ms step_avg:2119.62ms step:1576/3242 train_loss:3.2821 train_time:3319321ms step_avg:2119.62ms step:1577/3242 train_loss:3.5039 train_time:3321430ms step_avg:2119.61ms step:1578/3242 train_loss:3.3437 train_time:3323554ms step_avg:2119.61ms step:1579/3242 train_loss:3.3757 train_time:3325678ms step_avg:2119.62ms step:1580/3242 train_loss:3.5316 train_time:3327794ms step_avg:2119.61ms step:1581/3242 train_loss:3.6728 train_time:3329917ms step_avg:2119.62ms step:1582/3242 train_loss:3.4517 train_time:3332031ms step_avg:2119.61ms step:1583/3242 train_loss:3.1316 train_time:3334151ms step_avg:2119.61ms step:1584/3242 train_loss:3.2460 train_time:3336274ms step_avg:2119.61ms step:1585/3242 train_loss:3.5522 train_time:3338389ms step_avg:2119.61ms step:1586/3242 train_loss:3.4598 train_time:3340500ms step_avg:2119.61ms step:1587/3242 train_loss:3.4384 train_time:3342621ms step_avg:2119.61ms step:1588/3242 train_loss:3.3952 train_time:3344737ms step_avg:2119.60ms step:1589/3242 train_loss:3.4211 train_time:3346852ms step_avg:2119.60ms step:1590/3242 train_loss:3.5748 train_time:3348970ms step_avg:2119.60ms step:1591/3242 train_loss:3.4220 train_time:3351096ms step_avg:2119.61ms step:1592/3242 train_loss:3.2561 train_time:3353208ms step_avg:2119.60ms step:1593/3242 train_loss:3.3668 train_time:3355327ms step_avg:2119.60ms step:1594/3242 train_loss:3.5184 train_time:3357448ms step_avg:2119.60ms step:1595/3242 train_loss:3.3802 train_time:3359573ms step_avg:2119.60ms step:1596/3242 train_loss:3.5062 train_time:3361687ms step_avg:2119.60ms step:1597/3242 train_loss:3.4616 train_time:3363814ms step_avg:2119.61ms step:1598/3242 train_loss:3.3295 train_time:3365929ms step_avg:2119.60ms step:1599/3242 train_loss:3.1897 train_time:3368046ms step_avg:2119.60ms step:1600/3242 train_loss:3.3657 train_time:3370167ms step_avg:2119.60ms step:1601/3242 train_loss:3.3028 train_time:3372285ms step_avg:2119.60ms step:1602/3242 train_loss:3.4883 train_time:3374408ms step_avg:2119.60ms step:1603/3242 train_loss:3.3894 train_time:3376526ms step_avg:2119.60ms step:1604/3242 train_loss:3.4608 train_time:3378638ms step_avg:2119.60ms step:1605/3242 train_loss:3.5879 train_time:3380762ms step_avg:2119.60ms step:1606/3242 train_loss:3.3109 train_time:3382880ms step_avg:2119.60ms step:1607/3242 train_loss:3.4666 train_time:3384996ms step_avg:2119.60ms step:1608/3242 train_loss:3.3024 train_time:3387115ms step_avg:2119.60ms step:1609/3242 train_loss:3.5168 train_time:3389240ms step_avg:2119.60ms step:1610/3242 train_loss:3.5448 train_time:3391359ms step_avg:2119.60ms step:1611/3242 train_loss:3.4959 train_time:3393481ms step_avg:2119.60ms step:1612/3242 train_loss:3.4961 train_time:3395595ms step_avg:2119.60ms step:1613/3242 train_loss:3.2505 train_time:3397720ms step_avg:2119.60ms step:1614/3242 train_loss:3.5770 train_time:3399838ms step_avg:2119.60ms step:1615/3242 train_loss:3.6026 train_time:3401946ms step_avg:2119.59ms step:1616/3242 train_loss:3.6241 train_time:3404067ms step_avg:2119.59ms step:1617/3242 train_loss:2.9955 train_time:3406186ms step_avg:2119.59ms step:1618/3242 train_loss:3.6883 train_time:3408304ms step_avg:2119.59ms step:1619/3242 train_loss:3.5715 train_time:3410422ms step_avg:2119.59ms step:1620/3242 train_loss:3.4652 train_time:3412545ms step_avg:2119.59ms step:1621/3242 train_loss:3.6468 train_time:3414661ms step_avg:2119.59ms step:1622/3242 train_loss:3.4937 train_time:3416775ms step_avg:2119.59ms step:1623/3242 train_loss:3.4295 train_time:3418899ms step_avg:2119.59ms step:1624/3242 train_loss:3.5470 train_time:3421025ms step_avg:2119.59ms step:1625/3242 train_loss:3.1697 train_time:3423143ms step_avg:2119.59ms step:1625/3242 val_loss:3.4852 train_time:3423558ms step_avg:2119.85ms step:1626/3242 train_loss:3.3646 train_time:3425276ms step_avg:2119.60ms step:1627/3242 train_loss:3.3040 train_time:3427387ms step_avg:2119.60ms step:1628/3242 train_loss:3.5323 train_time:3429513ms step_avg:2119.60ms step:1629/3242 train_loss:3.4612 train_time:3431622ms step_avg:2119.59ms step:1630/3242 train_loss:3.4006 train_time:3433742ms step_avg:2119.59ms step:1631/3242 train_loss:3.6162 train_time:3435866ms step_avg:2119.60ms step:1632/3242 train_loss:3.3448 train_time:3437990ms step_avg:2119.60ms step:1633/3242 train_loss:3.5994 train_time:3440102ms step_avg:2119.59ms step:1634/3242 train_loss:3.4897 train_time:3442223ms step_avg:2119.60ms step:1635/3242 train_loss:3.4569 train_time:3444348ms step_avg:2119.60ms step:1636/3242 train_loss:3.4829 train_time:3446462ms step_avg:2119.60ms step:1637/3242 train_loss:3.4521 train_time:3448582ms step_avg:2119.60ms step:1638/3242 train_loss:3.4551 train_time:3450702ms step_avg:2119.60ms step:1639/3242 train_loss:3.2560 train_time:3452818ms step_avg:2119.59ms step:1640/3242 train_loss:3.2797 train_time:3454935ms step_avg:2119.59ms step:1641/3242 train_loss:3.4110 train_time:3457058ms step_avg:2119.59ms step:1642/3242 train_loss:3.3739 train_time:3459171ms step_avg:2119.59ms step:1643/3242 train_loss:3.3711 train_time:3461294ms step_avg:2119.59ms step:1644/3242 train_loss:3.4091 train_time:3463412ms step_avg:2119.59ms step:1645/3242 train_loss:3.3830 train_time:3465537ms step_avg:2119.59ms step:1646/3242 train_loss:4.1591 train_time:3467654ms step_avg:2119.59ms step:1647/3242 train_loss:3.3163 train_time:3469777ms step_avg:2119.59ms step:1648/3242 train_loss:3.6801 train_time:3471898ms step_avg:2119.60ms step:1649/3242 train_loss:3.7059 train_time:3474017ms step_avg:2119.60ms step:1650/3242 train_loss:3.5528 train_time:3476133ms step_avg:2119.59ms step:1651/3242 train_loss:3.4085 train_time:3478254ms step_avg:2119.59ms step:1652/3242 train_loss:3.7575 train_time:3480369ms step_avg:2119.59ms step:1653/3242 train_loss:3.4883 train_time:3482487ms step_avg:2119.59ms step:1654/3242 train_loss:3.3931 train_time:3484607ms step_avg:2119.59ms step:1655/3242 train_loss:3.3726 train_time:3486730ms step_avg:2119.59ms step:1656/3242 train_loss:3.3748 train_time:3488854ms step_avg:2119.60ms step:1657/3242 train_loss:3.3866 train_time:3490973ms step_avg:2119.60ms step:1658/3242 train_loss:3.6482 train_time:3493090ms step_avg:2119.59ms step:1659/3242 train_loss:3.7062 train_time:3495214ms step_avg:2119.60ms step:1660/3242 train_loss:3.4081 train_time:3497322ms step_avg:2119.59ms step:1661/3242 train_loss:3.4343 train_time:3499445ms step_avg:2119.59ms step:1662/3242 train_loss:3.4479 train_time:3501560ms step_avg:2119.59ms step:1663/3242 train_loss:3.6517 train_time:3503672ms step_avg:2119.58ms step:1664/3242 train_loss:3.6184 train_time:3505797ms step_avg:2119.59ms step:1665/3242 train_loss:3.4049 train_time:3507910ms step_avg:2119.58ms step:1666/3242 train_loss:3.6736 train_time:3510027ms step_avg:2119.58ms step:1667/3242 train_loss:3.0325 train_time:3512141ms step_avg:2119.58ms step:1668/3242 train_loss:3.4971 train_time:3514258ms step_avg:2119.58ms step:1669/3242 train_loss:3.3817 train_time:3516380ms step_avg:2119.58ms step:1670/3242 train_loss:3.3933 train_time:3518492ms step_avg:2119.57ms step:1671/3242 train_loss:3.4005 train_time:3520613ms step_avg:2119.57ms step:1672/3242 train_loss:3.3727 train_time:3522737ms step_avg:2119.58ms step:1673/3242 train_loss:3.6101 train_time:3524852ms step_avg:2119.57ms step:1674/3242 train_loss:3.7494 train_time:3526974ms step_avg:2119.58ms step:1675/3242 train_loss:3.5212 train_time:3529098ms step_avg:2119.58ms step:1676/3242 train_loss:3.3404 train_time:3531206ms step_avg:2119.57ms step:1677/3242 train_loss:3.3758 train_time:3533332ms step_avg:2119.58ms step:1678/3242 train_loss:3.4256 train_time:3535449ms step_avg:2119.57ms step:1679/3242 train_loss:3.4601 train_time:3537568ms step_avg:2119.57ms step:1680/3242 train_loss:3.2377 train_time:3539692ms step_avg:2119.58ms step:1681/3242 train_loss:3.5289 train_time:3541804ms step_avg:2119.57ms step:1682/3242 train_loss:3.5686 train_time:3543926ms step_avg:2119.57ms step:1683/3242 train_loss:3.4754 train_time:3546052ms step_avg:2119.58ms step:1684/3242 train_loss:3.4320 train_time:3548169ms step_avg:2119.58ms step:1685/3242 train_loss:3.3995 train_time:3550293ms step_avg:2119.58ms step:1686/3242 train_loss:3.5722 train_time:3552421ms step_avg:2119.58ms step:1687/3242 train_loss:3.4110 train_time:3554541ms step_avg:2119.58ms step:1688/3242 train_loss:3.3468 train_time:3556665ms step_avg:2119.59ms step:1689/3242 train_loss:3.4182 train_time:3558784ms step_avg:2119.59ms step:1690/3242 train_loss:3.4936 train_time:3560904ms step_avg:2119.59ms step:1691/3242 train_loss:3.7457 train_time:3563021ms step_avg:2119.58ms step:1692/3242 train_loss:3.4403 train_time:3565142ms step_avg:2119.58ms step:1693/3242 train_loss:3.4894 train_time:3567256ms step_avg:2119.58ms step:1694/3242 train_loss:3.5847 train_time:3569378ms step_avg:2119.58ms step:1695/3242 train_loss:3.4475 train_time:3571498ms step_avg:2119.58ms step:1696/3242 train_loss:3.6607 train_time:3573619ms step_avg:2119.58ms step:1697/3242 train_loss:3.3804 train_time:3575732ms step_avg:2119.58ms step:1698/3242 train_loss:3.3887 train_time:3577846ms step_avg:2119.58ms step:1699/3242 train_loss:3.4558 train_time:3579963ms step_avg:2119.58ms step:1700/3242 train_loss:3.3918 train_time:3582085ms step_avg:2119.58ms step:1701/3242 train_loss:3.4457 train_time:3584204ms step_avg:2119.58ms step:1702/3242 train_loss:3.3082 train_time:3586319ms step_avg:2119.57ms step:1703/3242 train_loss:3.3784 train_time:3588440ms step_avg:2119.57ms step:1704/3242 train_loss:3.5521 train_time:3590561ms step_avg:2119.58ms step:1705/3242 train_loss:3.4438 train_time:3592678ms step_avg:2119.57ms step:1706/3242 train_loss:3.4812 train_time:3594802ms step_avg:2119.58ms step:1707/3242 train_loss:3.4974 train_time:3596919ms step_avg:2119.58ms step:1708/3242 train_loss:3.5187 train_time:3599037ms step_avg:2119.57ms step:1709/3242 train_loss:3.2631 train_time:3601168ms step_avg:2119.58ms step:1710/3242 train_loss:3.3513 train_time:3603291ms step_avg:2119.58ms step:1711/3242 train_loss:3.4724 train_time:3605415ms step_avg:2119.59ms step:1712/3242 train_loss:3.4850 train_time:3607542ms step_avg:2119.59ms step:1713/3242 train_loss:3.6087 train_time:3609663ms step_avg:2119.59ms step:1714/3242 train_loss:3.5474 train_time:3611772ms step_avg:2119.58ms step:1715/3242 train_loss:3.4615 train_time:3614023ms step_avg:2119.66ms step:1716/3242 train_loss:3.5745 train_time:3616144ms step_avg:2119.66ms step:1717/3242 train_loss:3.7903 train_time:3618256ms step_avg:2119.66ms step:1718/3242 train_loss:3.4114 train_time:3620379ms step_avg:2119.66ms step:1719/3242 train_loss:3.5813 train_time:3622493ms step_avg:2119.66ms step:1720/3242 train_loss:3.2624 train_time:3624615ms step_avg:2119.66ms step:1721/3242 train_loss:3.3338 train_time:3626722ms step_avg:2119.65ms step:1722/3242 train_loss:3.6369 train_time:3628849ms step_avg:2119.65ms step:1723/3242 train_loss:3.4300 train_time:3630961ms step_avg:2119.65ms step:1724/3242 train_loss:3.4251 train_time:3633080ms step_avg:2119.65ms step:1725/3242 train_loss:3.0775 train_time:3635203ms step_avg:2119.65ms step:1726/3242 train_loss:3.5844 train_time:3637321ms step_avg:2119.65ms step:1727/3242 train_loss:3.3202 train_time:3639437ms step_avg:2119.65ms step:1728/3242 train_loss:3.6652 train_time:3641570ms step_avg:2119.66ms step:1729/3242 train_loss:3.5332 train_time:3643687ms step_avg:2119.65ms step:1730/3242 train_loss:3.5986 train_time:3645806ms step_avg:2119.65ms step:1731/3242 train_loss:3.5532 train_time:3647922ms step_avg:2119.65ms step:1732/3242 train_loss:3.3706 train_time:3650042ms step_avg:2119.65ms step:1733/3242 train_loss:3.5594 train_time:3652167ms step_avg:2119.66ms step:1734/3242 train_loss:3.4489 train_time:3654284ms step_avg:2119.65ms step:1735/3242 train_loss:3.6722 train_time:3656407ms step_avg:2119.66ms step:1736/3242 train_loss:3.7529 train_time:3658531ms step_avg:2119.66ms step:1737/3242 train_loss:3.3057 train_time:3660644ms step_avg:2119.66ms step:1738/3242 train_loss:3.5073 train_time:3662767ms step_avg:2119.66ms step:1739/3242 train_loss:3.5432 train_time:3664876ms step_avg:2119.65ms step:1740/3242 train_loss:3.3966 train_time:3666999ms step_avg:2119.65ms step:1741/3242 train_loss:3.5794 train_time:3669121ms step_avg:2119.65ms step:1742/3242 train_loss:3.5331 train_time:3671244ms step_avg:2119.66ms step:1743/3242 train_loss:3.5089 train_time:3673360ms step_avg:2119.65ms step:1744/3242 train_loss:3.7631 train_time:3675471ms step_avg:2119.65ms step:1745/3242 train_loss:3.3410 train_time:3677594ms step_avg:2119.65ms step:1746/3242 train_loss:3.1632 train_time:3679716ms step_avg:2119.65ms step:1747/3242 train_loss:3.0471 train_time:3681838ms step_avg:2119.65ms step:1748/3242 train_loss:3.5164 train_time:3683960ms step_avg:2119.65ms step:1749/3242 train_loss:3.3675 train_time:3686072ms step_avg:2119.65ms step:1750/3242 train_loss:3.4245 train_time:3688189ms step_avg:2119.65ms step:1750/3242 val_loss:3.4656 train_time:3688603ms step_avg:2119.89ms step:1751/3242 train_loss:3.2810 train_time:3690320ms step_avg:2119.66ms step:1752/3242 train_loss:3.6116 train_time:3692433ms step_avg:2119.65ms step:1753/3242 train_loss:3.5514 train_time:3694554ms step_avg:2119.65ms step:1754/3242 train_loss:3.4771 train_time:3696674ms step_avg:2119.65ms step:1755/3242 train_loss:3.4426 train_time:3698790ms step_avg:2119.65ms step:1756/3242 train_loss:3.5058 train_time:3700911ms step_avg:2119.65ms step:1757/3242 train_loss:2.9446 train_time:3703027ms step_avg:2119.65ms step:1758/3242 train_loss:3.1874 train_time:3705143ms step_avg:2119.65ms step:1759/3242 train_loss:3.1550 train_time:3707263ms step_avg:2119.65ms step:1760/3242 train_loss:3.3705 train_time:3709384ms step_avg:2119.65ms step:1761/3242 train_loss:3.4381 train_time:3711510ms step_avg:2119.65ms step:1762/3242 train_loss:3.2731 train_time:3713624ms step_avg:2119.65ms step:1763/3242 train_loss:6.1167 train_time:3715742ms step_avg:2119.65ms step:1764/3242 train_loss:3.4816 train_time:3717860ms step_avg:2119.65ms step:1765/3242 train_loss:3.5889 train_time:3719980ms step_avg:2119.65ms step:1766/3242 train_loss:3.5619 train_time:3722106ms step_avg:2119.65ms step:1767/3242 train_loss:3.4887 train_time:3724235ms step_avg:2119.66ms step:1768/3242 train_loss:3.4001 train_time:3726353ms step_avg:2119.65ms step:1769/3242 train_loss:3.4998 train_time:3728478ms step_avg:2119.66ms step:1770/3242 train_loss:3.3556 train_time:3730600ms step_avg:2119.66ms step:1771/3242 train_loss:3.6527 train_time:3732717ms step_avg:2119.66ms step:1772/3242 train_loss:3.5040 train_time:3734842ms step_avg:2119.66ms step:1773/3242 train_loss:3.5218 train_time:3736959ms step_avg:2119.66ms step:1774/3242 train_loss:4.1731 train_time:3739083ms step_avg:2119.66ms step:1775/3242 train_loss:3.2685 train_time:3741197ms step_avg:2119.66ms step:1776/3242 train_loss:3.3545 train_time:3743316ms step_avg:2119.66ms step:1777/3242 train_loss:3.9354 train_time:3745441ms step_avg:2119.66ms step:1778/3242 train_loss:3.1222 train_time:3747552ms step_avg:2119.66ms step:1779/3242 train_loss:3.5533 train_time:3749678ms step_avg:2119.66ms step:1780/3242 train_loss:3.4605 train_time:3751795ms step_avg:2119.66ms step:1781/3242 train_loss:3.7465 train_time:3753911ms step_avg:2119.66ms step:1782/3242 train_loss:3.6964 train_time:3756032ms step_avg:2119.66ms step:1783/3242 train_loss:3.4373 train_time:3758157ms step_avg:2119.66ms step:1784/3242 train_loss:3.5168 train_time:3760272ms step_avg:2119.66ms step:1785/3242 train_loss:3.6219 train_time:3762398ms step_avg:2119.66ms step:1786/3242 train_loss:3.1661 train_time:3764510ms step_avg:2119.66ms step:1787/3242 train_loss:3.6142 train_time:3766628ms step_avg:2119.66ms step:1788/3242 train_loss:3.4093 train_time:3768745ms step_avg:2119.65ms step:1789/3242 train_loss:3.3824 train_time:3770870ms step_avg:2119.66ms step:1790/3242 train_loss:3.6932 train_time:3772994ms step_avg:2119.66ms step:1791/3242 train_loss:3.3830 train_time:3775106ms step_avg:2119.66ms step:1792/3242 train_loss:3.3513 train_time:3777229ms step_avg:2119.66ms step:1793/3242 train_loss:3.4742 train_time:3779351ms step_avg:2119.66ms step:1794/3242 train_loss:3.5952 train_time:3781465ms step_avg:2119.66ms step:1795/3242 train_loss:3.4591 train_time:3783583ms step_avg:2119.65ms step:1796/3242 train_loss:3.3916 train_time:3785707ms step_avg:2119.66ms step:1797/3242 train_loss:3.4876 train_time:3787818ms step_avg:2119.65ms step:1798/3242 train_loss:3.3321 train_time:3789938ms step_avg:2119.65ms step:1799/3242 train_loss:3.4241 train_time:3792060ms step_avg:2119.65ms step:1800/3242 train_loss:3.3826 train_time:3794178ms step_avg:2119.65ms step:1801/3242 train_loss:3.4544 train_time:3796300ms step_avg:2119.65ms step:1802/3242 train_loss:3.4997 train_time:3798420ms step_avg:2119.65ms step:1803/3242 train_loss:3.4917 train_time:3800532ms step_avg:2119.65ms step:1804/3242 train_loss:3.6285 train_time:3802655ms step_avg:2119.65ms step:1805/3242 train_loss:3.5549 train_time:3804772ms step_avg:2119.65ms step:1806/3242 train_loss:3.2920 train_time:3806894ms step_avg:2119.65ms step:1807/3242 train_loss:3.4073 train_time:3809006ms step_avg:2119.65ms step:1808/3242 train_loss:3.4932 train_time:3811132ms step_avg:2119.65ms step:1809/3242 train_loss:3.3276 train_time:3813244ms step_avg:2119.65ms step:1810/3242 train_loss:3.5066 train_time:3815364ms step_avg:2119.65ms step:1811/3242 train_loss:3.6688 train_time:3817488ms step_avg:2119.65ms step:1812/3242 train_loss:3.4523 train_time:3819607ms step_avg:2119.65ms step:1813/3242 train_loss:3.5890 train_time:3821729ms step_avg:2119.65ms step:1814/3242 train_loss:3.5800 train_time:3823850ms step_avg:2119.65ms step:1815/3242 train_loss:3.2874 train_time:3825970ms step_avg:2119.65ms step:1816/3242 train_loss:3.5164 train_time:3828091ms step_avg:2119.65ms step:1817/3242 train_loss:3.4910 train_time:3830215ms step_avg:2119.65ms step:1818/3242 train_loss:3.5074 train_time:3832332ms step_avg:2119.65ms step:1819/3242 train_loss:3.3899 train_time:3834451ms step_avg:2119.65ms step:1820/3242 train_loss:3.5173 train_time:3836570ms step_avg:2119.65ms step:1821/3242 train_loss:3.7259 train_time:3838689ms step_avg:2119.65ms step:1822/3242 train_loss:3.3673 train_time:3840810ms step_avg:2119.65ms step:1823/3242 train_loss:3.6247 train_time:3842924ms step_avg:2119.65ms step:1824/3242 train_loss:3.4389 train_time:3845047ms step_avg:2119.65ms step:1825/3242 train_loss:2.9390 train_time:3847163ms step_avg:2119.65ms step:1826/3242 train_loss:3.1528 train_time:3849287ms step_avg:2119.65ms step:1827/3242 train_loss:3.4779 train_time:3851401ms step_avg:2119.65ms step:1828/3242 train_loss:3.5335 train_time:3853526ms step_avg:2119.65ms step:1829/3242 train_loss:3.5226 train_time:3855640ms step_avg:2119.65ms step:1830/3242 train_loss:3.5291 train_time:3857761ms step_avg:2119.65ms step:1831/3242 train_loss:3.2741 train_time:3859879ms step_avg:2119.65ms step:1832/3242 train_loss:3.5029 train_time:3861999ms step_avg:2119.65ms step:1833/3242 train_loss:3.5283 train_time:3864113ms step_avg:2119.65ms step:1834/3242 train_loss:3.3687 train_time:3866229ms step_avg:2119.64ms step:1835/3242 train_loss:3.4490 train_time:3868346ms step_avg:2119.64ms step:1836/3242 train_loss:3.4008 train_time:3870463ms step_avg:2119.64ms step:1837/3242 train_loss:4.0004 train_time:3872576ms step_avg:2119.64ms step:1838/3242 train_loss:3.4579 train_time:3874695ms step_avg:2119.64ms step:1839/3242 train_loss:3.6287 train_time:3876817ms step_avg:2119.64ms step:1840/3242 train_loss:3.4779 train_time:3878938ms step_avg:2119.64ms step:1841/3242 train_loss:3.3760 train_time:3881056ms step_avg:2119.64ms step:1842/3242 train_loss:3.3791 train_time:3883170ms step_avg:2119.63ms step:1843/3242 train_loss:3.4051 train_time:3885287ms step_avg:2119.63ms step:1844/3242 train_loss:3.4097 train_time:3887409ms step_avg:2119.63ms step:1845/3242 train_loss:3.4194 train_time:3889532ms step_avg:2119.64ms step:1846/3242 train_loss:3.5037 train_time:3891653ms step_avg:2119.64ms step:1847/3242 train_loss:3.3352 train_time:3893781ms step_avg:2119.64ms step:1848/3242 train_loss:3.4128 train_time:3895900ms step_avg:2119.64ms step:1849/3242 train_loss:3.3783 train_time:3898014ms step_avg:2119.64ms step:1850/3242 train_loss:3.4517 train_time:3900134ms step_avg:2119.64ms step:1851/3242 train_loss:3.3207 train_time:3902252ms step_avg:2119.64ms step:1852/3242 train_loss:3.7128 train_time:3904368ms step_avg:2119.64ms step:1853/3242 train_loss:3.2316 train_time:3906485ms step_avg:2119.63ms step:1854/3242 train_loss:3.4707 train_time:3908604ms step_avg:2119.63ms step:1855/3242 train_loss:3.1455 train_time:3910725ms step_avg:2119.63ms step:1856/3242 train_loss:3.4959 train_time:3912853ms step_avg:2119.64ms step:1857/3242 train_loss:3.6196 train_time:3914969ms step_avg:2119.64ms step:1858/3242 train_loss:3.4065 train_time:3917092ms step_avg:2119.64ms step:1859/3242 train_loss:3.2999 train_time:3919216ms step_avg:2119.64ms step:1860/3242 train_loss:3.4049 train_time:3921336ms step_avg:2119.64ms step:1861/3242 train_loss:3.7319 train_time:3923449ms step_avg:2119.64ms step:1862/3242 train_loss:3.3247 train_time:3925570ms step_avg:2119.64ms step:1863/3242 train_loss:3.4266 train_time:3927688ms step_avg:2119.64ms step:1864/3242 train_loss:3.4233 train_time:3929803ms step_avg:2119.63ms step:1865/3242 train_loss:3.2626 train_time:3931924ms step_avg:2119.64ms step:1866/3242 train_loss:3.3469 train_time:3934036ms step_avg:2119.63ms step:1867/3242 train_loss:3.4315 train_time:3936154ms step_avg:2119.63ms step:1868/3242 train_loss:3.5161 train_time:3938282ms step_avg:2119.63ms step:1869/3242 train_loss:3.1933 train_time:3940393ms step_avg:2119.63ms step:1870/3242 train_loss:3.4713 train_time:3942518ms step_avg:2119.63ms step:1871/3242 train_loss:3.3184 train_time:3944640ms step_avg:2119.63ms step:1872/3242 train_loss:3.5531 train_time:3946761ms step_avg:2119.64ms step:1873/3242 train_loss:3.3211 train_time:3948870ms step_avg:2119.63ms step:1874/3242 train_loss:3.4192 train_time:3950990ms step_avg:2119.63ms step:1875/3242 train_loss:3.5401 train_time:3953114ms step_avg:2119.63ms step:1875/3242 val_loss:3.4521 train_time:3953525ms step_avg:2119.85ms step:1876/3242 train_loss:3.4726 train_time:3955244ms step_avg:2119.64ms step:1877/3242 train_loss:3.2466 train_time:3957359ms step_avg:2119.64ms step:1878/3242 train_loss:3.4871 train_time:3959478ms step_avg:2119.64ms step:1879/3242 train_loss:3.3823 train_time:3961597ms step_avg:2119.63ms step:1880/3242 train_loss:3.5230 train_time:3963721ms step_avg:2119.64ms step:1881/3242 train_loss:3.4974 train_time:3965838ms step_avg:2119.64ms step:1882/3242 train_loss:3.6028 train_time:3967957ms step_avg:2119.64ms step:1883/3242 train_loss:3.4604 train_time:3970073ms step_avg:2119.63ms step:1884/3242 train_loss:3.5829 train_time:3972198ms step_avg:2119.64ms step:1885/3242 train_loss:3.4131 train_time:3974314ms step_avg:2119.63ms step:1886/3242 train_loss:3.6502 train_time:3976429ms step_avg:2119.63ms step:1887/3242 train_loss:3.3920 train_time:3978555ms step_avg:2119.63ms step:1888/3242 train_loss:2.3346 train_time:3980670ms step_avg:2119.63ms step:1889/3242 train_loss:3.4762 train_time:3982795ms step_avg:2119.64ms step:1890/3242 train_loss:3.3490 train_time:3984908ms step_avg:2119.63ms step:1891/3242 train_loss:3.1349 train_time:3987028ms step_avg:2119.63ms step:1892/3242 train_loss:3.6279 train_time:3989150ms step_avg:2119.63ms step:1893/3242 train_loss:3.4479 train_time:3991269ms step_avg:2119.63ms step:1894/3242 train_loss:3.1823 train_time:3993388ms step_avg:2119.63ms step:1895/3242 train_loss:4.0310 train_time:3995504ms step_avg:2119.63ms step:1896/3242 train_loss:3.4529 train_time:3997626ms step_avg:2119.63ms step:1897/3242 train_loss:3.3223 train_time:3999744ms step_avg:2119.63ms step:1898/3242 train_loss:3.5901 train_time:4001862ms step_avg:2119.63ms step:1899/3242 train_loss:3.2482 train_time:4003981ms step_avg:2119.63ms step:1900/3242 train_loss:3.4190 train_time:4006100ms step_avg:2119.63ms step:1901/3242 train_loss:3.4738 train_time:4008221ms step_avg:2119.63ms step:1902/3242 train_loss:3.3099 train_time:4010340ms step_avg:2119.63ms step:1903/3242 train_loss:3.5461 train_time:4012463ms step_avg:2119.63ms step:1904/3242 train_loss:3.5827 train_time:4014585ms step_avg:2119.63ms step:1905/3242 train_loss:3.6248 train_time:4016825ms step_avg:2119.70ms step:1906/3242 train_loss:3.2566 train_time:4018942ms step_avg:2119.70ms step:1907/3242 train_loss:3.3412 train_time:4021060ms step_avg:2119.69ms step:1908/3242 train_loss:3.2465 train_time:4023174ms step_avg:2119.69ms step:1909/3242 train_loss:3.5117 train_time:4025296ms step_avg:2119.69ms step:1910/3242 train_loss:3.3578 train_time:4027415ms step_avg:2119.69ms step:1911/3242 train_loss:3.2808 train_time:4029531ms step_avg:2119.69ms step:1912/3242 train_loss:3.3342 train_time:4031648ms step_avg:2119.69ms step:1913/3242 train_loss:3.7130 train_time:4033770ms step_avg:2119.69ms step:1914/3242 train_loss:3.8699 train_time:4035880ms step_avg:2119.68ms step:1915/3242 train_loss:3.3731 train_time:4038004ms step_avg:2119.69ms step:1916/3242 train_loss:3.6128 train_time:4040124ms step_avg:2119.69ms step:1917/3242 train_loss:3.5619 train_time:4042241ms step_avg:2119.69ms step:1918/3242 train_loss:3.5510 train_time:4044365ms step_avg:2119.69ms step:1919/3242 train_loss:3.3644 train_time:4046473ms step_avg:2119.68ms step:1920/3242 train_loss:3.5403 train_time:4048591ms step_avg:2119.68ms step:1921/3242 train_loss:3.5582 train_time:4050707ms step_avg:2119.68ms step:1922/3242 train_loss:3.4308 train_time:4052826ms step_avg:2119.68ms step:1923/3242 train_loss:3.5327 train_time:4054949ms step_avg:2119.68ms step:1924/3242 train_loss:3.5460 train_time:4057064ms step_avg:2119.68ms step:1925/3242 train_loss:3.3144 train_time:4059184ms step_avg:2119.68ms step:1926/3242 train_loss:3.2485 train_time:4061302ms step_avg:2119.68ms step:1927/3242 train_loss:3.2885 train_time:4063423ms step_avg:2119.68ms step:1928/3242 train_loss:3.4701 train_time:4065530ms step_avg:2119.67ms step:1929/3242 train_loss:3.4073 train_time:4067653ms step_avg:2119.67ms step:1930/3242 train_loss:3.5558 train_time:4069773ms step_avg:2119.67ms step:1931/3242 train_loss:3.6663 train_time:4071899ms step_avg:2119.68ms step:1932/3242 train_loss:3.3590 train_time:4074010ms step_avg:2119.67ms step:1933/3242 train_loss:3.4467 train_time:4076127ms step_avg:2119.67ms step:1934/3242 train_loss:3.4177 train_time:4078250ms step_avg:2119.67ms step:1935/3242 train_loss:3.5014 train_time:4080371ms step_avg:2119.67ms step:1936/3242 train_loss:3.2021 train_time:4082497ms step_avg:2119.68ms step:1937/3242 train_loss:5.4144 train_time:4084613ms step_avg:2119.67ms step:1938/3242 train_loss:3.3840 train_time:4086742ms step_avg:2119.68ms step:1939/3242 train_loss:3.5023 train_time:4088862ms step_avg:2119.68ms step:1940/3242 train_loss:3.5780 train_time:4090978ms step_avg:2119.68ms step:1941/3242 train_loss:3.4517 train_time:4093096ms step_avg:2119.68ms step:1942/3242 train_loss:3.4151 train_time:4095211ms step_avg:2119.67ms step:1943/3242 train_loss:3.6752 train_time:4097330ms step_avg:2119.67ms step:1944/3242 train_loss:3.7859 train_time:4099447ms step_avg:2119.67ms step:1945/3242 train_loss:3.4586 train_time:4101565ms step_avg:2119.67ms step:1946/3242 train_loss:3.4222 train_time:4103686ms step_avg:2119.67ms step:1947/3242 train_loss:3.1182 train_time:4105801ms step_avg:2119.67ms step:1948/3242 train_loss:3.6157 train_time:4107921ms step_avg:2119.67ms step:1949/3242 train_loss:3.2268 train_time:4110041ms step_avg:2119.67ms step:1950/3242 train_loss:3.4568 train_time:4112161ms step_avg:2119.67ms step:1951/3242 train_loss:3.5036 train_time:4114285ms step_avg:2119.67ms step:1952/3242 train_loss:3.3380 train_time:4116404ms step_avg:2119.67ms step:1953/3242 train_loss:3.5792 train_time:4118527ms step_avg:2119.67ms step:1954/3242 train_loss:3.5620 train_time:4120646ms step_avg:2119.67ms step:1955/3242 train_loss:3.3760 train_time:4122763ms step_avg:2119.67ms step:1956/3242 train_loss:3.3805 train_time:4124881ms step_avg:2119.67ms step:1957/3242 train_loss:3.4551 train_time:4127005ms step_avg:2119.67ms step:1958/3242 train_loss:3.3062 train_time:4129120ms step_avg:2119.67ms step:1959/3242 train_loss:3.3807 train_time:4131246ms step_avg:2119.67ms step:1960/3242 train_loss:3.7919 train_time:4133363ms step_avg:2119.67ms step:1961/3242 train_loss:3.4892 train_time:4135484ms step_avg:2119.67ms step:1962/3242 train_loss:3.2911 train_time:4137603ms step_avg:2119.67ms step:1963/3242 train_loss:3.3460 train_time:4139723ms step_avg:2119.67ms step:1964/3242 train_loss:3.6767 train_time:4141836ms step_avg:2119.67ms step:1965/3242 train_loss:3.3250 train_time:4143956ms step_avg:2119.67ms step:1966/3242 train_loss:3.3771 train_time:4146077ms step_avg:2119.67ms step:1967/3242 train_loss:3.3807 train_time:4148201ms step_avg:2119.67ms step:1968/3242 train_loss:3.4235 train_time:4150323ms step_avg:2119.67ms step:1969/3242 train_loss:3.2297 train_time:4152440ms step_avg:2119.67ms step:1970/3242 train_loss:3.5473 train_time:4154562ms step_avg:2119.67ms step:1971/3242 train_loss:3.3849 train_time:4156675ms step_avg:2119.67ms step:1972/3242 train_loss:3.3369 train_time:4158798ms step_avg:2119.67ms step:1973/3242 train_loss:3.3563 train_time:4160911ms step_avg:2119.67ms step:1974/3242 train_loss:3.6445 train_time:4163034ms step_avg:2119.67ms step:1975/3242 train_loss:3.6377 train_time:4165150ms step_avg:2119.67ms step:1976/3242 train_loss:3.6490 train_time:4167265ms step_avg:2119.67ms step:1977/3242 train_loss:3.3283 train_time:4169380ms step_avg:2119.66ms step:1978/3242 train_loss:3.4752 train_time:4171500ms step_avg:2119.66ms step:1979/3242 train_loss:3.4693 train_time:4173631ms step_avg:2119.67ms step:1980/3242 train_loss:3.4799 train_time:4175750ms step_avg:2119.67ms step:1981/3242 train_loss:3.6608 train_time:4177867ms step_avg:2119.67ms step:1982/3242 train_loss:3.5032 train_time:4179988ms step_avg:2119.67ms step:1983/3242 train_loss:3.8318 train_time:4182111ms step_avg:2119.67ms step:1984/3242 train_loss:3.3727 train_time:4184224ms step_avg:2119.67ms step:1985/3242 train_loss:3.4123 train_time:4186353ms step_avg:2119.67ms step:1986/3242 train_loss:3.5410 train_time:4188476ms step_avg:2119.67ms step:1987/3242 train_loss:3.6410 train_time:4190592ms step_avg:2119.67ms step:1988/3242 train_loss:3.3432 train_time:4192709ms step_avg:2119.67ms step:1989/3242 train_loss:3.7295 train_time:4194830ms step_avg:2119.67ms step:1990/3242 train_loss:3.4640 train_time:4196947ms step_avg:2119.67ms step:1991/3242 train_loss:3.5865 train_time:4199060ms step_avg:2119.67ms step:1992/3242 train_loss:3.3834 train_time:4201178ms step_avg:2119.67ms step:1993/3242 train_loss:3.5201 train_time:4203305ms step_avg:2119.67ms step:1994/3242 train_loss:3.3786 train_time:4205424ms step_avg:2119.67ms step:1995/3242 train_loss:3.4570 train_time:4207544ms step_avg:2119.67ms step:1996/3242 train_loss:3.5674 train_time:4209658ms step_avg:2119.67ms step:1997/3242 train_loss:3.3275 train_time:4211779ms step_avg:2119.67ms step:1998/3242 train_loss:3.4396 train_time:4213902ms step_avg:2119.67ms step:1999/3242 train_loss:3.9967 train_time:4216021ms step_avg:2119.67ms step:2000/3242 train_loss:3.4893 train_time:4218137ms step_avg:2119.67ms step:2000/3242 val_loss:3.4385 train_time:4218549ms step_avg:2119.87ms step:2001/3242 train_loss:3.1532 train_time:4220258ms step_avg:2119.67ms step:2002/3242 train_loss:3.4324 train_time:4222381ms step_avg:2119.67ms step:2003/3242 train_loss:3.4081 train_time:4224503ms step_avg:2119.67ms step:2004/3242 train_loss:3.3052 train_time:4226628ms step_avg:2119.67ms step:2005/3242 train_loss:3.5475 train_time:4228747ms step_avg:2119.67ms step:2006/3242 train_loss:3.6199 train_time:4230866ms step_avg:2119.67ms step:2007/3242 train_loss:3.2218 train_time:4232982ms step_avg:2119.67ms step:2008/3242 train_loss:3.5205 train_time:4235104ms step_avg:2119.67ms step:2009/3242 train_loss:3.5072 train_time:4237216ms step_avg:2119.67ms step:2010/3242 train_loss:3.4848 train_time:4239336ms step_avg:2119.67ms step:2011/3242 train_loss:3.5013 train_time:4241449ms step_avg:2119.66ms step:2012/3242 train_loss:3.3200 train_time:4243570ms step_avg:2119.67ms step:2013/3242 train_loss:3.4359 train_time:4245684ms step_avg:2119.66ms step:2014/3242 train_loss:3.2646 train_time:4247804ms step_avg:2119.66ms step:2015/3242 train_loss:3.2800 train_time:4249926ms step_avg:2119.66ms step:2016/3242 train_loss:3.5833 train_time:4252048ms step_avg:2119.67ms step:2017/3242 train_loss:3.5156 train_time:4254170ms step_avg:2119.67ms step:2018/3242 train_loss:3.4493 train_time:4256284ms step_avg:2119.66ms step:2019/3242 train_loss:3.3006 train_time:4258410ms step_avg:2119.67ms step:2020/3242 train_loss:3.3482 train_time:4260524ms step_avg:2119.66ms step:2021/3242 train_loss:3.4782 train_time:4262645ms step_avg:2119.66ms step:2022/3242 train_loss:3.5441 train_time:4264775ms step_avg:2119.67ms step:2023/3242 train_loss:3.2928 train_time:4266886ms step_avg:2119.67ms step:2024/3242 train_loss:3.5609 train_time:4269008ms step_avg:2119.67ms step:2025/3242 train_loss:3.2749 train_time:4271123ms step_avg:2119.66ms step:2026/3242 train_loss:3.2121 train_time:4273246ms step_avg:2119.67ms step:2027/3242 train_loss:3.4931 train_time:4275361ms step_avg:2119.66ms step:2028/3242 train_loss:3.6786 train_time:4277476ms step_avg:2119.66ms step:2029/3242 train_loss:3.5614 train_time:4279600ms step_avg:2119.66ms step:2030/3242 train_loss:3.4052 train_time:4281717ms step_avg:2119.66ms step:2031/3242 train_loss:3.4764 train_time:4283836ms step_avg:2119.66ms step:2032/3242 train_loss:3.3435 train_time:4285961ms step_avg:2119.66ms step:2033/3242 train_loss:3.3480 train_time:4288070ms step_avg:2119.66ms step:2034/3242 train_loss:3.5706 train_time:4290193ms step_avg:2119.66ms step:2035/3242 train_loss:3.5399 train_time:4292313ms step_avg:2119.66ms step:2036/3242 train_loss:3.4833 train_time:4294435ms step_avg:2119.66ms step:2037/3242 train_loss:3.4814 train_time:4296551ms step_avg:2119.66ms step:2038/3242 train_loss:3.5270 train_time:4298671ms step_avg:2119.66ms step:2039/3242 train_loss:3.5383 train_time:4300790ms step_avg:2119.66ms step:2040/3242 train_loss:3.3856 train_time:4302909ms step_avg:2119.66ms step:2041/3242 train_loss:3.5329 train_time:4305026ms step_avg:2119.66ms step:2042/3242 train_loss:3.3195 train_time:4307138ms step_avg:2119.65ms step:2043/3242 train_loss:3.2809 train_time:4309263ms step_avg:2119.66ms step:2044/3242 train_loss:3.2600 train_time:4311379ms step_avg:2119.66ms step:2045/3242 train_loss:3.4841 train_time:4313500ms step_avg:2119.66ms step:2046/3242 train_loss:3.3659 train_time:4315627ms step_avg:2119.66ms step:2047/3242 train_loss:3.1393 train_time:4317746ms step_avg:2119.66ms step:2048/3242 train_loss:3.2316 train_time:4319862ms step_avg:2119.66ms step:2049/3242 train_loss:3.2950 train_time:4321984ms step_avg:2119.66ms step:2050/3242 train_loss:3.5870 train_time:4324103ms step_avg:2119.66ms step:2051/3242 train_loss:3.3742 train_time:4326227ms step_avg:2119.66ms step:2052/3242 train_loss:3.6409 train_time:4328340ms step_avg:2119.66ms step:2053/3242 train_loss:3.3845 train_time:4330465ms step_avg:2119.66ms step:2054/3242 train_loss:3.4850 train_time:4332577ms step_avg:2119.66ms step:2055/3242 train_loss:3.4082 train_time:4334705ms step_avg:2119.66ms step:2056/3242 train_loss:3.3537 train_time:4336818ms step_avg:2119.66ms step:2057/3242 train_loss:3.4750 train_time:4338940ms step_avg:2119.66ms step:2058/3242 train_loss:3.4154 train_time:4341054ms step_avg:2119.66ms step:2059/3242 train_loss:3.2999 train_time:4343175ms step_avg:2119.66ms step:2060/3242 train_loss:3.4826 train_time:4345300ms step_avg:2119.66ms step:2061/3242 train_loss:3.2760 train_time:4347425ms step_avg:2119.66ms step:2062/3242 train_loss:3.2562 train_time:4349538ms step_avg:2119.66ms step:2063/3242 train_loss:3.9054 train_time:4351661ms step_avg:2119.66ms step:2064/3242 train_loss:3.5779 train_time:4353779ms step_avg:2119.66ms step:2065/3242 train_loss:3.6983 train_time:4355895ms step_avg:2119.66ms step:2066/3242 train_loss:3.4297 train_time:4358011ms step_avg:2119.66ms step:2067/3242 train_loss:3.6949 train_time:4360135ms step_avg:2119.66ms step:2068/3242 train_loss:3.3882 train_time:4362249ms step_avg:2119.65ms step:2069/3242 train_loss:3.3814 train_time:4364372ms step_avg:2119.66ms step:2070/3242 train_loss:3.4240 train_time:4366495ms step_avg:2119.66ms step:2071/3242 train_loss:3.4263 train_time:4368605ms step_avg:2119.65ms step:2072/3242 train_loss:3.6681 train_time:4370728ms step_avg:2119.65ms step:2073/3242 train_loss:3.7212 train_time:4372851ms step_avg:2119.66ms step:2074/3242 train_loss:3.3360 train_time:4374971ms step_avg:2119.66ms step:2075/3242 train_loss:3.4243 train_time:4377089ms step_avg:2119.66ms step:2076/3242 train_loss:3.7035 train_time:4379205ms step_avg:2119.65ms step:2077/3242 train_loss:3.4120 train_time:4381325ms step_avg:2119.65ms step:2078/3242 train_loss:3.4541 train_time:4383441ms step_avg:2119.65ms step:2079/3242 train_loss:3.6448 train_time:4385563ms step_avg:2119.65ms step:2080/3242 train_loss:3.5007 train_time:4387681ms step_avg:2119.65ms step:2081/3242 train_loss:3.6250 train_time:4389807ms step_avg:2119.66ms step:2082/3242 train_loss:3.6782 train_time:4391922ms step_avg:2119.65ms step:2083/3242 train_loss:3.3187 train_time:4394045ms step_avg:2119.65ms step:2084/3242 train_loss:3.3041 train_time:4396161ms step_avg:2119.65ms step:2085/3242 train_loss:3.5235 train_time:4398278ms step_avg:2119.65ms step:2086/3242 train_loss:3.6531 train_time:4400403ms step_avg:2119.65ms step:2087/3242 train_loss:3.3277 train_time:4402516ms step_avg:2119.65ms step:2088/3242 train_loss:3.4240 train_time:4404637ms step_avg:2119.65ms step:2089/3242 train_loss:3.1828 train_time:4406763ms step_avg:2119.66ms step:2090/3242 train_loss:3.4369 train_time:4408879ms step_avg:2119.65ms step:2091/3242 train_loss:3.5117 train_time:4411003ms step_avg:2119.66ms step:2092/3242 train_loss:3.1497 train_time:4413122ms step_avg:2119.66ms step:2093/3242 train_loss:3.3540 train_time:4415238ms step_avg:2119.65ms step:2094/3242 train_loss:3.4361 train_time:4417353ms step_avg:2119.65ms step:2095/3242 train_loss:3.4281 train_time:4419473ms step_avg:2119.65ms step:2096/3242 train_loss:3.4113 train_time:4421720ms step_avg:2119.71ms step:2097/3242 train_loss:3.4553 train_time:4423846ms step_avg:2119.72ms step:2098/3242 train_loss:3.2516 train_time:4425965ms step_avg:2119.72ms step:2099/3242 train_loss:2.9094 train_time:4428079ms step_avg:2119.71ms step:2100/3242 train_loss:3.3896 train_time:4430199ms step_avg:2119.71ms step:2101/3242 train_loss:3.5766 train_time:4432321ms step_avg:2119.71ms step:2102/3242 train_loss:3.5331 train_time:4434443ms step_avg:2119.71ms step:2103/3242 train_loss:3.3036 train_time:4436561ms step_avg:2119.71ms step:2104/3242 train_loss:3.3789 train_time:4438683ms step_avg:2119.71ms step:2105/3242 train_loss:3.3771 train_time:4440809ms step_avg:2119.72ms step:2106/3242 train_loss:3.9191 train_time:4442932ms step_avg:2119.72ms step:2107/3242 train_loss:3.2559 train_time:4445049ms step_avg:2119.72ms step:2108/3242 train_loss:3.4521 train_time:4447169ms step_avg:2119.72ms step:2109/3242 train_loss:3.5815 train_time:4449286ms step_avg:2119.72ms step:2110/3242 train_loss:2.8610 train_time:4451407ms step_avg:2119.72ms step:2111/3242 train_loss:3.4325 train_time:4453519ms step_avg:2119.71ms step:2112/3242 train_loss:3.2786 train_time:4455648ms step_avg:2119.72ms step:2113/3242 train_loss:3.5014 train_time:4457768ms step_avg:2119.72ms step:2114/3242 train_loss:3.7126 train_time:4459882ms step_avg:2119.72ms step:2115/3242 train_loss:3.1733 train_time:4462002ms step_avg:2119.72ms step:2116/3242 train_loss:3.7538 train_time:4464126ms step_avg:2119.72ms step:2117/3242 train_loss:3.3018 train_time:4466246ms step_avg:2119.72ms step:2118/3242 train_loss:3.4998 train_time:4468360ms step_avg:2119.72ms step:2119/3242 train_loss:3.3556 train_time:4470482ms step_avg:2119.72ms step:2120/3242 train_loss:3.5152 train_time:4472600ms step_avg:2119.72ms step:2121/3242 train_loss:3.3847 train_time:4474720ms step_avg:2119.72ms step:2122/3242 train_loss:3.5651 train_time:4476839ms step_avg:2119.72ms step:2123/3242 train_loss:3.3645 train_time:4478971ms step_avg:2119.72ms step:2124/3242 train_loss:3.3067 train_time:4481088ms step_avg:2119.72ms step:2125/3242 train_loss:3.3794 train_time:4483196ms step_avg:2119.71ms step:2125/3242 val_loss:3.4285 train_time:4483609ms step_avg:2119.91ms step:2126/3242 train_loss:3.3234 train_time:4485326ms step_avg:2119.72ms step:2127/3242 train_loss:3.6166 train_time:4487442ms step_avg:2119.72ms step:2128/3242 train_loss:3.2157 train_time:4489564ms step_avg:2119.72ms step:2129/3242 train_loss:3.2274 train_time:4491690ms step_avg:2119.72ms step:2130/3242 train_loss:3.4238 train_time:4493805ms step_avg:2119.72ms step:2131/3242 train_loss:3.4892 train_time:4495924ms step_avg:2119.72ms step:2132/3242 train_loss:3.3672 train_time:4498044ms step_avg:2119.72ms step:2133/3242 train_loss:3.5538 train_time:4500161ms step_avg:2119.72ms step:2134/3242 train_loss:3.3285 train_time:4502281ms step_avg:2119.72ms step:2135/3242 train_loss:3.0223 train_time:4504405ms step_avg:2119.72ms step:2136/3242 train_loss:3.4899 train_time:4506520ms step_avg:2119.72ms step:2137/3242 train_loss:3.5242 train_time:4508648ms step_avg:2119.72ms step:2138/3242 train_loss:3.3832 train_time:4510764ms step_avg:2119.72ms step:2139/3242 train_loss:3.4710 train_time:4512884ms step_avg:2119.72ms step:2140/3242 train_loss:3.2653 train_time:4514999ms step_avg:2119.72ms step:2141/3242 train_loss:3.5166 train_time:4517120ms step_avg:2119.72ms step:2142/3242 train_loss:3.4038 train_time:4519239ms step_avg:2119.72ms step:2143/3242 train_loss:3.2545 train_time:4521359ms step_avg:2119.72ms step:2144/3242 train_loss:3.5714 train_time:4523485ms step_avg:2119.72ms step:2145/3242 train_loss:3.8195 train_time:4525599ms step_avg:2119.72ms step:2146/3242 train_loss:3.8426 train_time:4527722ms step_avg:2119.72ms step:2147/3242 train_loss:3.2341 train_time:4529842ms step_avg:2119.72ms step:2148/3242 train_loss:3.1328 train_time:4531954ms step_avg:2119.72ms step:2149/3242 train_loss:3.5412 train_time:4534080ms step_avg:2119.72ms step:2150/3242 train_loss:3.7569 train_time:4536197ms step_avg:2119.72ms step:2151/3242 train_loss:3.3502 train_time:4538322ms step_avg:2119.72ms step:2152/3242 train_loss:3.4050 train_time:4540441ms step_avg:2119.72ms step:2153/3242 train_loss:3.4048 train_time:4542556ms step_avg:2119.72ms step:2154/3242 train_loss:3.2486 train_time:4544685ms step_avg:2119.72ms step:2155/3242 train_loss:3.3939 train_time:4546800ms step_avg:2119.72ms step:2156/3242 train_loss:3.5303 train_time:4548924ms step_avg:2119.72ms step:2157/3242 train_loss:3.3694 train_time:4551044ms step_avg:2119.72ms step:2158/3242 train_loss:3.2622 train_time:4553164ms step_avg:2119.72ms step:2159/3242 train_loss:3.4747 train_time:4555276ms step_avg:2119.72ms step:2160/3242 train_loss:3.3646 train_time:4557399ms step_avg:2119.72ms step:2161/3242 train_loss:3.4212 train_time:4559513ms step_avg:2119.72ms step:2162/3242 train_loss:3.4378 train_time:4561627ms step_avg:2119.72ms step:2163/3242 train_loss:3.4036 train_time:4563754ms step_avg:2119.72ms step:2164/3242 train_loss:3.5034 train_time:4565864ms step_avg:2119.71ms step:2165/3242 train_loss:3.4332 train_time:4567987ms step_avg:2119.72ms step:2166/3242 train_loss:3.4296 train_time:4570112ms step_avg:2119.72ms step:2167/3242 train_loss:3.4135 train_time:4572228ms step_avg:2119.72ms step:2168/3242 train_loss:3.4756 train_time:4574342ms step_avg:2119.71ms step:2169/3242 train_loss:3.3480 train_time:4576465ms step_avg:2119.71ms step:2170/3242 train_loss:3.3437 train_time:4578586ms step_avg:2119.72ms step:2171/3242 train_loss:3.4508 train_time:4580711ms step_avg:2119.72ms step:2172/3242 train_loss:3.3600 train_time:4582834ms step_avg:2119.72ms step:2173/3242 train_loss:3.3111 train_time:4584955ms step_avg:2119.72ms step:2174/3242 train_loss:3.3049 train_time:4587063ms step_avg:2119.71ms step:2175/3242 train_loss:3.5337 train_time:4589183ms step_avg:2119.72ms step:2176/3242 train_loss:3.4129 train_time:4591302ms step_avg:2119.71ms step:2177/3242 train_loss:3.2675 train_time:4593418ms step_avg:2119.71ms step:2178/3242 train_loss:3.5133 train_time:4595541ms step_avg:2119.71ms step:2179/3242 train_loss:3.3463 train_time:4597652ms step_avg:2119.71ms step:2180/3242 train_loss:3.5017 train_time:4599778ms step_avg:2119.71ms step:2181/3242 train_loss:3.3326 train_time:4601888ms step_avg:2119.71ms step:2182/3242 train_loss:3.5120 train_time:4604011ms step_avg:2119.71ms step:2183/3242 train_loss:3.3561 train_time:4606135ms step_avg:2119.71ms step:2184/3242 train_loss:3.3729 train_time:4608252ms step_avg:2119.71ms step:2185/3242 train_loss:3.5301 train_time:4610373ms step_avg:2119.71ms step:2186/3242 train_loss:3.3957 train_time:4612495ms step_avg:2119.71ms step:2187/3242 train_loss:3.5435 train_time:4614611ms step_avg:2119.71ms step:2188/3242 train_loss:3.4216 train_time:4616728ms step_avg:2119.71ms step:2189/3242 train_loss:3.2584 train_time:4618845ms step_avg:2119.71ms step:2190/3242 train_loss:3.3923 train_time:4620959ms step_avg:2119.71ms step:2191/3242 train_loss:3.4915 train_time:4623085ms step_avg:2119.71ms step:2192/3242 train_loss:3.1449 train_time:4625207ms step_avg:2119.71ms step:2193/3242 train_loss:3.3801 train_time:4627319ms step_avg:2119.71ms step:2194/3242 train_loss:3.3844 train_time:4629442ms step_avg:2119.71ms step:2195/3242 train_loss:3.5402 train_time:4631561ms step_avg:2119.71ms step:2196/3242 train_loss:3.4071 train_time:4633678ms step_avg:2119.71ms step:2197/3242 train_loss:3.8490 train_time:4635802ms step_avg:2119.71ms step:2198/3242 train_loss:3.5570 train_time:4637915ms step_avg:2119.71ms step:2199/3242 train_loss:3.4441 train_time:4640039ms step_avg:2119.71ms step:2200/3242 train_loss:3.3602 train_time:4642160ms step_avg:2119.71ms step:2201/3242 train_loss:3.3219 train_time:4644275ms step_avg:2119.71ms step:2202/3242 train_loss:3.5748 train_time:4646399ms step_avg:2119.71ms step:2203/3242 train_loss:3.4034 train_time:4648519ms step_avg:2119.71ms step:2204/3242 train_loss:3.3353 train_time:4650639ms step_avg:2119.71ms step:2205/3242 train_loss:3.4257 train_time:4652758ms step_avg:2119.71ms step:2206/3242 train_loss:3.4518 train_time:4654878ms step_avg:2119.71ms step:2207/3242 train_loss:3.4327 train_time:4656993ms step_avg:2119.71ms step:2208/3242 train_loss:4.0304 train_time:4659112ms step_avg:2119.71ms step:2209/3242 train_loss:3.5702 train_time:4661226ms step_avg:2119.70ms step:2210/3242 train_loss:3.2939 train_time:4663348ms step_avg:2119.70ms step:2211/3242 train_loss:3.2453 train_time:4665476ms step_avg:2119.71ms step:2212/3242 train_loss:3.5237 train_time:4667596ms step_avg:2119.71ms step:2213/3242 train_loss:3.4567 train_time:4669720ms step_avg:2119.71ms step:2214/3242 train_loss:3.4873 train_time:4671840ms step_avg:2119.71ms step:2215/3242 train_loss:3.4123 train_time:4673953ms step_avg:2119.71ms step:2216/3242 train_loss:3.4211 train_time:4676072ms step_avg:2119.71ms step:2217/3242 train_loss:3.4025 train_time:4678196ms step_avg:2119.71ms step:2218/3242 train_loss:3.3341 train_time:4680322ms step_avg:2119.71ms step:2219/3242 train_loss:3.3977 train_time:4682432ms step_avg:2119.71ms step:2220/3242 train_loss:3.7434 train_time:4684559ms step_avg:2119.71ms step:2221/3242 train_loss:3.5533 train_time:4686671ms step_avg:2119.71ms step:2222/3242 train_loss:3.0743 train_time:4688791ms step_avg:2119.71ms step:2223/3242 train_loss:3.4977 train_time:4690911ms step_avg:2119.71ms step:2224/3242 train_loss:3.2364 train_time:4693027ms step_avg:2119.70ms step:2225/3242 train_loss:3.4362 train_time:4695142ms step_avg:2119.70ms step:2226/3242 train_loss:3.5021 train_time:4697265ms step_avg:2119.70ms step:2227/3242 train_loss:4.0086 train_time:4699387ms step_avg:2119.71ms step:2228/3242 train_loss:3.6390 train_time:4701502ms step_avg:2119.70ms step:2229/3242 train_loss:3.5134 train_time:4703629ms step_avg:2119.71ms step:2230/3242 train_loss:3.3818 train_time:4705740ms step_avg:2119.70ms step:2231/3242 train_loss:3.5419 train_time:4707860ms step_avg:2119.70ms step:2232/3242 train_loss:3.5246 train_time:4709980ms step_avg:2119.70ms step:2233/3242 train_loss:3.1693 train_time:4712102ms step_avg:2119.70ms step:2234/3242 train_loss:3.3590 train_time:4714215ms step_avg:2119.70ms step:2235/3242 train_loss:3.2049 train_time:4716338ms step_avg:2119.70ms step:2236/3242 train_loss:3.2812 train_time:4718461ms step_avg:2119.70ms step:2237/3242 train_loss:3.5421 train_time:4720575ms step_avg:2119.70ms step:2238/3242 train_loss:3.7000 train_time:4722699ms step_avg:2119.70ms step:2239/3242 train_loss:3.3911 train_time:4724814ms step_avg:2119.70ms step:2240/3242 train_loss:3.3835 train_time:4726929ms step_avg:2119.70ms step:2241/3242 train_loss:3.2739 train_time:4729049ms step_avg:2119.70ms step:2242/3242 train_loss:3.5350 train_time:4731171ms step_avg:2119.70ms step:2243/3242 train_loss:3.3360 train_time:4733291ms step_avg:2119.70ms step:2244/3242 train_loss:3.1861 train_time:4735406ms step_avg:2119.70ms step:2245/3242 train_loss:3.3425 train_time:4737529ms step_avg:2119.70ms step:2246/3242 train_loss:3.3701 train_time:4739644ms step_avg:2119.70ms step:2247/3242 train_loss:3.6667 train_time:4741763ms step_avg:2119.70ms step:2248/3242 train_loss:3.2696 train_time:4743890ms step_avg:2119.70ms step:2249/3242 train_loss:3.5655 train_time:4746008ms step_avg:2119.70ms step:2250/3242 train_loss:3.2504 train_time:4748121ms step_avg:2119.70ms step:2250/3242 val_loss:3.4172 train_time:4748536ms step_avg:2119.88ms step:2251/3242 train_loss:3.0562 train_time:4750256ms step_avg:2119.70ms step:2252/3242 train_loss:3.1408 train_time:4752384ms step_avg:2119.71ms step:2253/3242 train_loss:3.6432 train_time:4754497ms step_avg:2119.70ms step:2254/3242 train_loss:3.4838 train_time:4756626ms step_avg:2119.71ms step:2255/3242 train_loss:3.2998 train_time:4758736ms step_avg:2119.70ms step:2256/3242 train_loss:3.3492 train_time:4760867ms step_avg:2119.71ms step:2257/3242 train_loss:3.4299 train_time:4762983ms step_avg:2119.71ms step:2258/3242 train_loss:3.4226 train_time:4765111ms step_avg:2119.71ms step:2259/3242 train_loss:3.4073 train_time:4767232ms step_avg:2119.71ms step:2260/3242 train_loss:3.3815 train_time:4769359ms step_avg:2119.72ms step:2261/3242 train_loss:3.5398 train_time:4771478ms step_avg:2119.71ms step:2262/3242 train_loss:3.3874 train_time:4773593ms step_avg:2119.71ms step:2263/3242 train_loss:4.5833 train_time:4775714ms step_avg:2119.71ms step:2264/3242 train_loss:3.3061 train_time:4777833ms step_avg:2119.71ms step:2265/3242 train_loss:3.4366 train_time:4779958ms step_avg:2119.72ms step:2266/3242 train_loss:3.4769 train_time:4782075ms step_avg:2119.71ms step:2267/3242 train_loss:3.4856 train_time:4784191ms step_avg:2119.71ms step:2268/3242 train_loss:3.6302 train_time:4786312ms step_avg:2119.71ms step:2269/3242 train_loss:3.2573 train_time:4788437ms step_avg:2119.72ms step:2270/3242 train_loss:3.8455 train_time:4790556ms step_avg:2119.71ms step:2271/3242 train_loss:3.0306 train_time:4792678ms step_avg:2119.72ms step:2272/3242 train_loss:3.2548 train_time:4794797ms step_avg:2119.72ms step:2273/3242 train_loss:3.3294 train_time:4796914ms step_avg:2119.71ms step:2274/3242 train_loss:3.3092 train_time:4799040ms step_avg:2119.72ms step:2275/3242 train_loss:4.5400 train_time:4801159ms step_avg:2119.72ms step:2276/3242 train_loss:3.4416 train_time:4803287ms step_avg:2119.72ms step:2277/3242 train_loss:3.5309 train_time:4805403ms step_avg:2119.72ms step:2278/3242 train_loss:3.3476 train_time:4807520ms step_avg:2119.72ms step:2279/3242 train_loss:3.4436 train_time:4809640ms step_avg:2119.72ms step:2280/3242 train_loss:3.4166 train_time:4811757ms step_avg:2119.72ms step:2281/3242 train_loss:3.2413 train_time:4813881ms step_avg:2119.72ms step:2282/3242 train_loss:3.2097 train_time:4816000ms step_avg:2119.72ms step:2283/3242 train_loss:3.8226 train_time:4818120ms step_avg:2119.72ms step:2284/3242 train_loss:3.3043 train_time:4820244ms step_avg:2119.72ms step:2285/3242 train_loss:3.4693 train_time:4822356ms step_avg:2119.72ms step:2286/3242 train_loss:3.4783 train_time:4824599ms step_avg:2119.77ms step:2287/3242 train_loss:3.6230 train_time:4826723ms step_avg:2119.77ms step:2288/3242 train_loss:3.3569 train_time:4828840ms step_avg:2119.77ms step:2289/3242 train_loss:3.5808 train_time:4830960ms step_avg:2119.77ms step:2290/3242 train_loss:3.4517 train_time:4833080ms step_avg:2119.77ms step:2291/3242 train_loss:3.5224 train_time:4835200ms step_avg:2119.77ms step:2292/3242 train_loss:3.5014 train_time:4837313ms step_avg:2119.77ms step:2293/3242 train_loss:3.4263 train_time:4839437ms step_avg:2119.77ms step:2294/3242 train_loss:3.3293 train_time:4841549ms step_avg:2119.77ms step:2295/3242 train_loss:3.3187 train_time:4843672ms step_avg:2119.77ms step:2296/3242 train_loss:3.4064 train_time:4845796ms step_avg:2119.77ms step:2297/3242 train_loss:3.5492 train_time:4847912ms step_avg:2119.77ms step:2298/3242 train_loss:3.3327 train_time:4850028ms step_avg:2119.77ms step:2299/3242 train_loss:3.2785 train_time:4852156ms step_avg:2119.77ms step:2300/3242 train_loss:3.4503 train_time:4854272ms step_avg:2119.77ms step:2301/3242 train_loss:3.3708 train_time:4856389ms step_avg:2119.77ms step:2302/3242 train_loss:3.3954 train_time:4858512ms step_avg:2119.77ms step:2303/3242 train_loss:3.3229 train_time:4860628ms step_avg:2119.77ms step:2304/3242 train_loss:3.2663 train_time:4862747ms step_avg:2119.77ms step:2305/3242 train_loss:3.4484 train_time:4864871ms step_avg:2119.77ms step:2306/3242 train_loss:3.3610 train_time:4866990ms step_avg:2119.77ms step:2307/3242 train_loss:3.3608 train_time:4869101ms step_avg:2119.77ms step:2308/3242 train_loss:3.6405 train_time:4871225ms step_avg:2119.77ms step:2309/3242 train_loss:3.4674 train_time:4873344ms step_avg:2119.77ms step:2310/3242 train_loss:3.4371 train_time:4875463ms step_avg:2119.77ms step:2311/3242 train_loss:3.4128 train_time:4877583ms step_avg:2119.77ms step:2312/3242 train_loss:2.7873 train_time:4879699ms step_avg:2119.76ms step:2313/3242 train_loss:3.3890 train_time:4881825ms step_avg:2119.77ms step:2314/3242 train_loss:3.1472 train_time:4883940ms step_avg:2119.77ms step:2315/3242 train_loss:3.4993 train_time:4886063ms step_avg:2119.77ms step:2316/3242 train_loss:3.4546 train_time:4888175ms step_avg:2119.76ms step:2317/3242 train_loss:3.2885 train_time:4890292ms step_avg:2119.76ms step:2318/3242 train_loss:3.2459 train_time:4892417ms step_avg:2119.76ms step:2319/3242 train_loss:3.2675 train_time:4894537ms step_avg:2119.76ms step:2320/3242 train_loss:3.6169 train_time:4896653ms step_avg:2119.76ms step:2321/3242 train_loss:3.3508 train_time:4898781ms step_avg:2119.77ms step:2322/3242 train_loss:3.4323 train_time:4900888ms step_avg:2119.76ms step:2323/3242 train_loss:3.1342 train_time:4903011ms step_avg:2119.76ms step:2324/3242 train_loss:3.2667 train_time:4905132ms step_avg:2119.76ms step:2325/3242 train_loss:3.5175 train_time:4907249ms step_avg:2119.76ms step:2326/3242 train_loss:3.3318 train_time:4909372ms step_avg:2119.76ms step:2327/3242 train_loss:3.3687 train_time:4911480ms step_avg:2119.76ms step:2328/3242 train_loss:3.8659 train_time:4913603ms step_avg:2119.76ms step:2329/3242 train_loss:3.2865 train_time:4915729ms step_avg:2119.76ms step:2330/3242 train_loss:3.4339 train_time:4917845ms step_avg:2119.76ms step:2331/3242 train_loss:3.2274 train_time:4919961ms step_avg:2119.76ms step:2332/3242 train_loss:3.6946 train_time:4922086ms step_avg:2119.76ms step:2333/3242 train_loss:3.4909 train_time:4924210ms step_avg:2119.76ms step:2334/3242 train_loss:3.1472 train_time:4926323ms step_avg:2119.76ms step:2335/3242 train_loss:3.6681 train_time:4928451ms step_avg:2119.76ms step:2336/3242 train_loss:3.4437 train_time:4930568ms step_avg:2119.76ms step:2337/3242 train_loss:3.4113 train_time:4932692ms step_avg:2119.76ms step:2338/3242 train_loss:3.4476 train_time:4934804ms step_avg:2119.76ms step:2339/3242 train_loss:3.3800 train_time:4936923ms step_avg:2119.76ms step:2340/3242 train_loss:3.3514 train_time:4939037ms step_avg:2119.76ms step:2341/3242 train_loss:3.5193 train_time:4941161ms step_avg:2119.76ms step:2342/3242 train_loss:3.3160 train_time:4943287ms step_avg:2119.76ms step:2343/3242 train_loss:3.5235 train_time:4945401ms step_avg:2119.76ms step:2344/3242 train_loss:3.3869 train_time:4947525ms step_avg:2119.76ms step:2345/3242 train_loss:4.1808 train_time:4949645ms step_avg:2119.76ms step:2346/3242 train_loss:3.0793 train_time:4951757ms step_avg:2119.76ms step:2347/3242 train_loss:3.1761 train_time:4953882ms step_avg:2119.76ms step:2348/3242 train_loss:3.0446 train_time:4955998ms step_avg:2119.76ms step:2349/3242 train_loss:3.6688 train_time:4958122ms step_avg:2119.76ms step:2350/3242 train_loss:3.4541 train_time:4960238ms step_avg:2119.76ms step:2351/3242 train_loss:3.3582 train_time:4962354ms step_avg:2119.76ms step:2352/3242 train_loss:3.4661 train_time:4964471ms step_avg:2119.76ms step:2353/3242 train_loss:4.2977 train_time:4966599ms step_avg:2119.76ms step:2354/3242 train_loss:3.4545 train_time:4968715ms step_avg:2119.76ms step:2355/3242 train_loss:3.3739 train_time:4970834ms step_avg:2119.76ms step:2356/3242 train_loss:3.4991 train_time:4972949ms step_avg:2119.76ms step:2357/3242 train_loss:3.4104 train_time:4975068ms step_avg:2119.76ms step:2358/3242 train_loss:3.3739 train_time:4977188ms step_avg:2119.76ms step:2359/3242 train_loss:3.3938 train_time:4979308ms step_avg:2119.76ms step:2360/3242 train_loss:4.1056 train_time:4981435ms step_avg:2119.76ms step:2361/3242 train_loss:3.0631 train_time:4983550ms step_avg:2119.76ms step:2362/3242 train_loss:3.4146 train_time:4985668ms step_avg:2119.76ms step:2363/3242 train_loss:3.7403 train_time:4987784ms step_avg:2119.76ms step:2364/3242 train_loss:3.7548 train_time:4989902ms step_avg:2119.75ms step:2365/3242 train_loss:3.4795 train_time:4992032ms step_avg:2119.76ms step:2366/3242 train_loss:3.4785 train_time:4994151ms step_avg:2119.76ms step:2367/3242 train_loss:3.3278 train_time:4996270ms step_avg:2119.76ms step:2368/3242 train_loss:3.5621 train_time:4998384ms step_avg:2119.76ms step:2369/3242 train_loss:3.3495 train_time:5000507ms step_avg:2119.76ms step:2370/3242 train_loss:3.3570 train_time:5002629ms step_avg:2119.76ms step:2371/3242 train_loss:3.5239 train_time:5004742ms step_avg:2119.76ms step:2372/3242 train_loss:3.4504 train_time:5006863ms step_avg:2119.76ms step:2373/3242 train_loss:3.4935 train_time:5008981ms step_avg:2119.76ms step:2374/3242 train_loss:3.3320 train_time:5011102ms step_avg:2119.76ms step:2375/3242 train_loss:3.4043 train_time:5013222ms step_avg:2119.76ms step:2375/3242 val_loss:3.4045 train_time:5013635ms step_avg:2119.93ms step:2376/3242 train_loss:3.1914 train_time:5015349ms step_avg:2119.76ms step:2377/3242 train_loss:3.4865 train_time:5017470ms step_avg:2119.76ms step:2378/3242 train_loss:3.4272 train_time:5019591ms step_avg:2119.76ms step:2379/3242 train_loss:3.5912 train_time:5021705ms step_avg:2119.76ms step:2380/3242 train_loss:3.2161 train_time:5023829ms step_avg:2119.76ms step:2381/3242 train_loss:3.2650 train_time:5025949ms step_avg:2119.76ms step:2382/3242 train_loss:3.3344 train_time:5028063ms step_avg:2119.76ms step:2383/3242 train_loss:4.6873 train_time:5030171ms step_avg:2119.75ms step:2384/3242 train_loss:3.5648 train_time:5032300ms step_avg:2119.76ms step:2385/3242 train_loss:3.3573 train_time:5034416ms step_avg:2119.75ms step:2386/3242 train_loss:3.4486 train_time:5036536ms step_avg:2119.75ms step:2387/3242 train_loss:3.4071 train_time:5038663ms step_avg:2119.76ms step:2388/3242 train_loss:3.3624 train_time:5040784ms step_avg:2119.76ms step:2389/3242 train_loss:3.3818 train_time:5042898ms step_avg:2119.76ms step:2390/3242 train_loss:3.2012 train_time:5045024ms step_avg:2119.76ms step:2391/3242 train_loss:3.5133 train_time:5047140ms step_avg:2119.76ms step:2392/3242 train_loss:3.2747 train_time:5049260ms step_avg:2119.76ms step:2393/3242 train_loss:3.2574 train_time:5051380ms step_avg:2119.76ms step:2394/3242 train_loss:3.7756 train_time:5053494ms step_avg:2119.75ms step:2395/3242 train_loss:3.4903 train_time:5055628ms step_avg:2119.76ms step:2396/3242 train_loss:3.2628 train_time:5057751ms step_avg:2119.76ms step:2397/3242 train_loss:3.5143 train_time:5059865ms step_avg:2119.76ms step:2398/3242 train_loss:3.5498 train_time:5061975ms step_avg:2119.76ms step:2399/3242 train_loss:3.4697 train_time:5064094ms step_avg:2119.75ms step:2400/3242 train_loss:3.6679 train_time:5066217ms step_avg:2119.76ms step:2401/3242 train_loss:3.1620 train_time:5068341ms step_avg:2119.76ms step:2402/3242 train_loss:3.3792 train_time:5070463ms step_avg:2119.76ms step:2403/3242 train_loss:3.3545 train_time:5072574ms step_avg:2119.76ms step:2404/3242 train_loss:3.3285 train_time:5074692ms step_avg:2119.75ms step:2405/3242 train_loss:3.5478 train_time:5076819ms step_avg:2119.76ms step:2406/3242 train_loss:3.4118 train_time:5078938ms step_avg:2119.76ms step:2407/3242 train_loss:3.3921 train_time:5081054ms step_avg:2119.76ms step:2408/3242 train_loss:3.4524 train_time:5083178ms step_avg:2119.76ms step:2409/3242 train_loss:3.4277 train_time:5085297ms step_avg:2119.76ms step:2410/3242 train_loss:3.3739 train_time:5087416ms step_avg:2119.76ms step:2411/3242 train_loss:3.1950 train_time:5089529ms step_avg:2119.75ms step:2412/3242 train_loss:3.6431 train_time:5091654ms step_avg:2119.76ms step:2413/3242 train_loss:3.4375 train_time:5093772ms step_avg:2119.76ms step:2414/3242 train_loss:3.2429 train_time:5095891ms step_avg:2119.76ms step:2415/3242 train_loss:3.3914 train_time:5098012ms step_avg:2119.76ms step:2416/3242 train_loss:3.2557 train_time:5100124ms step_avg:2119.75ms step:2417/3242 train_loss:3.5868 train_time:5102245ms step_avg:2119.75ms step:2418/3242 train_loss:3.3253 train_time:5104373ms step_avg:2119.76ms step:2419/3242 train_loss:3.2734 train_time:5106493ms step_avg:2119.76ms step:2420/3242 train_loss:3.4293 train_time:5108609ms step_avg:2119.75ms step:2421/3242 train_loss:3.5362 train_time:5110733ms step_avg:2119.76ms step:2422/3242 train_loss:3.6537 train_time:5112853ms step_avg:2119.76ms step:2423/3242 train_loss:3.2931 train_time:5114967ms step_avg:2119.75ms step:2424/3242 train_loss:3.3011 train_time:5117087ms step_avg:2119.75ms step:2425/3242 train_loss:3.4993 train_time:5119202ms step_avg:2119.75ms step:2426/3242 train_loss:3.3880 train_time:5121325ms step_avg:2119.75ms step:2427/3242 train_loss:3.6215 train_time:5123450ms step_avg:2119.76ms step:2428/3242 train_loss:3.2339 train_time:5125563ms step_avg:2119.75ms step:2429/3242 train_loss:3.4268 train_time:5127679ms step_avg:2119.75ms step:2430/3242 train_loss:3.5105 train_time:5129804ms step_avg:2119.75ms step:2431/3242 train_loss:3.2374 train_time:5131922ms step_avg:2119.75ms step:2432/3242 train_loss:3.4313 train_time:5134039ms step_avg:2119.75ms step:2433/3242 train_loss:3.9435 train_time:5136152ms step_avg:2119.75ms step:2434/3242 train_loss:3.3462 train_time:5138278ms step_avg:2119.75ms step:2435/3242 train_loss:3.3818 train_time:5140391ms step_avg:2119.75ms step:2436/3242 train_loss:3.4395 train_time:5142518ms step_avg:2119.75ms step:2437/3242 train_loss:4.3872 train_time:5144637ms step_avg:2119.75ms step:2438/3242 train_loss:3.1463 train_time:5146749ms step_avg:2119.75ms step:2439/3242 train_loss:3.4765 train_time:5148869ms step_avg:2119.75ms step:2440/3242 train_loss:3.5230 train_time:5150991ms step_avg:2119.75ms step:2441/3242 train_loss:3.5364 train_time:5153112ms step_avg:2119.75ms step:2442/3242 train_loss:3.4614 train_time:5155234ms step_avg:2119.75ms step:2443/3242 train_loss:3.4988 train_time:5157345ms step_avg:2119.75ms step:2444/3242 train_loss:3.3440 train_time:5159466ms step_avg:2119.75ms step:2445/3242 train_loss:3.3746 train_time:5161586ms step_avg:2119.75ms step:2446/3242 train_loss:3.4312 train_time:5163710ms step_avg:2119.75ms step:2447/3242 train_loss:3.3496 train_time:5165831ms step_avg:2119.75ms step:2448/3242 train_loss:3.9290 train_time:5167946ms step_avg:2119.75ms step:2449/3242 train_loss:3.4523 train_time:5170068ms step_avg:2119.75ms step:2450/3242 train_loss:3.5750 train_time:5172185ms step_avg:2119.75ms step:2451/3242 train_loss:3.2596 train_time:5174303ms step_avg:2119.75ms step:2452/3242 train_loss:3.2786 train_time:5176423ms step_avg:2119.75ms step:2453/3242 train_loss:3.3594 train_time:5178546ms step_avg:2119.75ms step:2454/3242 train_loss:3.4980 train_time:5180661ms step_avg:2119.75ms step:2455/3242 train_loss:3.5108 train_time:5182786ms step_avg:2119.75ms step:2456/3242 train_loss:3.6082 train_time:5184902ms step_avg:2119.75ms step:2457/3242 train_loss:3.3536 train_time:5187022ms step_avg:2119.75ms step:2458/3242 train_loss:3.2785 train_time:5189141ms step_avg:2119.75ms step:2459/3242 train_loss:3.2985 train_time:5191258ms step_avg:2119.75ms step:2460/3242 train_loss:3.0608 train_time:5193375ms step_avg:2119.74ms step:2461/3242 train_loss:3.4479 train_time:5195499ms step_avg:2119.75ms step:2462/3242 train_loss:3.1169 train_time:5197613ms step_avg:2119.74ms step:2463/3242 train_loss:3.4219 train_time:5199736ms step_avg:2119.75ms step:2464/3242 train_loss:3.8876 train_time:5201864ms step_avg:2119.75ms step:2465/3242 train_loss:3.4156 train_time:5203979ms step_avg:2119.75ms step:2466/3242 train_loss:3.2838 train_time:5206092ms step_avg:2119.74ms step:2467/3242 train_loss:3.2596 train_time:5208218ms step_avg:2119.75ms step:2468/3242 train_loss:3.4595 train_time:5210340ms step_avg:2119.75ms step:2469/3242 train_loss:3.5478 train_time:5212456ms step_avg:2119.75ms step:2470/3242 train_loss:3.3263 train_time:5214577ms step_avg:2119.75ms step:2471/3242 train_loss:3.2568 train_time:5216690ms step_avg:2119.74ms step:2472/3242 train_loss:3.3472 train_time:5218808ms step_avg:2119.74ms step:2473/3242 train_loss:3.2494 train_time:5220934ms step_avg:2119.75ms step:2474/3242 train_loss:3.5385 train_time:5223048ms step_avg:2119.74ms step:2475/3242 train_loss:3.4835 train_time:5225169ms step_avg:2119.74ms step:2476/3242 train_loss:3.5472 train_time:5227286ms step_avg:2119.74ms step:2477/3242 train_loss:3.5031 train_time:5229542ms step_avg:2119.80ms step:2478/3242 train_loss:3.5808 train_time:5231658ms step_avg:2119.80ms step:2479/3242 train_loss:3.5202 train_time:5233774ms step_avg:2119.80ms step:2480/3242 train_loss:3.9725 train_time:5235895ms step_avg:2119.80ms step:2481/3242 train_loss:3.2274 train_time:5238005ms step_avg:2119.79ms step:2482/3242 train_loss:3.5796 train_time:5240120ms step_avg:2119.79ms step:2483/3242 train_loss:3.7744 train_time:5242245ms step_avg:2119.79ms step:2484/3242 train_loss:3.3179 train_time:5244365ms step_avg:2119.79ms step:2485/3242 train_loss:3.1474 train_time:5246480ms step_avg:2119.79ms step:2486/3242 train_loss:3.6100 train_time:5248600ms step_avg:2119.79ms step:2487/3242 train_loss:3.3611 train_time:5250723ms step_avg:2119.79ms step:2488/3242 train_loss:3.3325 train_time:5252835ms step_avg:2119.79ms step:2489/3242 train_loss:3.6969 train_time:5254955ms step_avg:2119.79ms step:2490/3242 train_loss:3.3205 train_time:5257075ms step_avg:2119.79ms step:2491/3242 train_loss:2.6899 train_time:5259200ms step_avg:2119.79ms step:2492/3242 train_loss:3.4621 train_time:5261317ms step_avg:2119.79ms step:2493/3242 train_loss:3.2518 train_time:5263442ms step_avg:2119.79ms step:2494/3242 train_loss:3.2283 train_time:5265559ms step_avg:2119.79ms step:2495/3242 train_loss:3.3699 train_time:5267680ms step_avg:2119.79ms step:2496/3242 train_loss:3.3494 train_time:5269799ms step_avg:2119.79ms step:2497/3242 train_loss:3.9065 train_time:5271918ms step_avg:2119.79ms step:2498/3242 train_loss:3.4392 train_time:5274035ms step_avg:2119.79ms step:2499/3242 train_loss:3.3141 train_time:5276154ms step_avg:2119.79ms step:2500/3242 train_loss:3.4247 train_time:5278274ms step_avg:2119.79ms step:2500/3242 val_loss:3.3818 train_time:5278687ms step_avg:2119.95ms step:2501/3242 train_loss:3.4147 train_time:5280406ms step_avg:2119.79ms step:2502/3242 train_loss:3.3794 train_time:5282532ms step_avg:2119.80ms step:2503/3242 train_loss:3.4002 train_time:5284656ms step_avg:2119.80ms step:2504/3242 train_loss:3.4697 train_time:5286773ms step_avg:2119.80ms step:2505/3242 train_loss:3.6745 train_time:5288894ms step_avg:2119.80ms step:2506/3242 train_loss:3.5886 train_time:5291013ms step_avg:2119.80ms step:2507/3242 train_loss:3.4909 train_time:5293131ms step_avg:2119.80ms step:2508/3242 train_loss:3.5124 train_time:5295247ms step_avg:2119.79ms step:2509/3242 train_loss:3.2094 train_time:5297369ms step_avg:2119.80ms step:2510/3242 train_loss:3.5182 train_time:5299484ms step_avg:2119.79ms step:2511/3242 train_loss:3.3315 train_time:5301610ms step_avg:2119.80ms step:2512/3242 train_loss:3.4609 train_time:5303726ms step_avg:2119.79ms step:2513/3242 train_loss:3.1942 train_time:5305848ms step_avg:2119.80ms step:2514/3242 train_loss:3.5250 train_time:5307968ms step_avg:2119.80ms step:2515/3242 train_loss:3.4938 train_time:5310080ms step_avg:2119.79ms step:2516/3242 train_loss:3.3152 train_time:5312205ms step_avg:2119.79ms step:2517/3242 train_loss:3.6068 train_time:5314323ms step_avg:2119.79ms step:2518/3242 train_loss:3.3901 train_time:5316441ms step_avg:2119.79ms step:2519/3242 train_loss:3.3904 train_time:5318561ms step_avg:2119.79ms step:2520/3242 train_loss:3.3817 train_time:5320677ms step_avg:2119.79ms step:2521/3242 train_loss:3.5348 train_time:5322797ms step_avg:2119.79ms step:2522/3242 train_loss:3.4953 train_time:5324913ms step_avg:2119.79ms step:2523/3242 train_loss:3.2790 train_time:5327033ms step_avg:2119.79ms step:2524/3242 train_loss:3.3976 train_time:5329158ms step_avg:2119.79ms step:2525/3242 train_loss:3.4042 train_time:5331275ms step_avg:2119.79ms step:2526/3242 train_loss:3.5462 train_time:5333395ms step_avg:2119.79ms step:2527/3242 train_loss:3.3490 train_time:5335519ms step_avg:2119.79ms step:2528/3242 train_loss:3.0359 train_time:5337629ms step_avg:2119.79ms step:2529/3242 train_loss:3.4010 train_time:5339747ms step_avg:2119.79ms step:2530/3242 train_loss:3.2735 train_time:5341870ms step_avg:2119.79ms step:2531/3242 train_loss:3.2738 train_time:5343991ms step_avg:2119.79ms step:2532/3242 train_loss:3.3387 train_time:5346112ms step_avg:2119.79ms step:2533/3242 train_loss:3.4198 train_time:5348236ms step_avg:2119.79ms step:2534/3242 train_loss:3.2238 train_time:5350364ms step_avg:2119.80ms step:2535/3242 train_loss:3.4973 train_time:5352482ms step_avg:2119.79ms step:2536/3242 train_loss:3.3140 train_time:5354600ms step_avg:2119.79ms step:2537/3242 train_loss:3.2099 train_time:5356724ms step_avg:2119.80ms step:2538/3242 train_loss:3.4965 train_time:5358847ms step_avg:2119.80ms step:2539/3242 train_loss:3.3607 train_time:5360960ms step_avg:2119.79ms step:2540/3242 train_loss:3.2250 train_time:5363084ms step_avg:2119.80ms step:2541/3242 train_loss:3.4169 train_time:5365200ms step_avg:2119.79ms step:2542/3242 train_loss:3.5862 train_time:5367323ms step_avg:2119.80ms step:2543/3242 train_loss:3.3122 train_time:5369438ms step_avg:2119.79ms step:2544/3242 train_loss:3.6447 train_time:5371562ms step_avg:2119.80ms step:2545/3242 train_loss:3.3307 train_time:5373682ms step_avg:2119.80ms step:2546/3242 train_loss:3.3599 train_time:5375801ms step_avg:2119.80ms step:2547/3242 train_loss:3.7806 train_time:5377915ms step_avg:2119.79ms step:2548/3242 train_loss:3.3342 train_time:5380039ms step_avg:2119.79ms step:2549/3242 train_loss:3.4748 train_time:5382156ms step_avg:2119.79ms step:2550/3242 train_loss:3.3697 train_time:5384276ms step_avg:2119.79ms step:2551/3242 train_loss:3.3612 train_time:5386393ms step_avg:2119.79ms step:2552/3242 train_loss:3.3336 train_time:5388511ms step_avg:2119.79ms step:2553/3242 train_loss:3.2172 train_time:5390633ms step_avg:2119.79ms step:2554/3242 train_loss:3.3136 train_time:5392750ms step_avg:2119.79ms step:2555/3242 train_loss:3.3399 train_time:5394871ms step_avg:2119.79ms step:2556/3242 train_loss:3.2590 train_time:5396990ms step_avg:2119.79ms step:2557/3242 train_loss:3.4409 train_time:5399107ms step_avg:2119.79ms step:2558/3242 train_loss:3.3979 train_time:5401235ms step_avg:2119.79ms step:2559/3242 train_loss:3.2610 train_time:5403349ms step_avg:2119.79ms step:2560/3242 train_loss:3.5770 train_time:5405465ms step_avg:2119.79ms step:2561/3242 train_loss:3.4677 train_time:5407586ms step_avg:2119.79ms step:2562/3242 train_loss:3.5558 train_time:5409705ms step_avg:2119.79ms step:2563/3242 train_loss:3.4757 train_time:5411831ms step_avg:2119.79ms step:2564/3242 train_loss:3.3952 train_time:5413947ms step_avg:2119.79ms step:2565/3242 train_loss:3.4538 train_time:5416068ms step_avg:2119.79ms step:2566/3242 train_loss:3.5066 train_time:5418184ms step_avg:2119.79ms step:2567/3242 train_loss:3.3411 train_time:5420298ms step_avg:2119.79ms step:2568/3242 train_loss:3.3673 train_time:5422424ms step_avg:2119.79ms step:2569/3242 train_loss:3.5033 train_time:5424544ms step_avg:2119.79ms step:2570/3242 train_loss:3.2874 train_time:5426661ms step_avg:2119.79ms step:2571/3242 train_loss:3.3749 train_time:5428784ms step_avg:2119.79ms step:2572/3242 train_loss:3.4139 train_time:5430899ms step_avg:2119.79ms step:2573/3242 train_loss:3.0946 train_time:5433017ms step_avg:2119.79ms step:2574/3242 train_loss:3.3143 train_time:5435144ms step_avg:2119.79ms step:2575/3242 train_loss:3.5833 train_time:5437259ms step_avg:2119.79ms step:2576/3242 train_loss:3.2066 train_time:5439375ms step_avg:2119.79ms step:2577/3242 train_loss:3.1474 train_time:5441495ms step_avg:2119.79ms step:2578/3242 train_loss:3.2921 train_time:5443617ms step_avg:2119.79ms step:2579/3242 train_loss:3.2833 train_time:5445734ms step_avg:2119.79ms step:2580/3242 train_loss:3.4379 train_time:5447857ms step_avg:2119.79ms step:2581/3242 train_loss:2.7539 train_time:5449977ms step_avg:2119.79ms step:2582/3242 train_loss:3.2734 train_time:5452094ms step_avg:2119.79ms step:2583/3242 train_loss:3.3257 train_time:5454212ms step_avg:2119.79ms step:2584/3242 train_loss:3.2431 train_time:5456333ms step_avg:2119.79ms step:2585/3242 train_loss:3.4543 train_time:5458455ms step_avg:2119.79ms step:2586/3242 train_loss:3.2934 train_time:5460574ms step_avg:2119.79ms step:2587/3242 train_loss:3.6321 train_time:5462694ms step_avg:2119.79ms step:2588/3242 train_loss:3.1659 train_time:5464815ms step_avg:2119.79ms step:2589/3242 train_loss:3.5476 train_time:5466933ms step_avg:2119.79ms step:2590/3242 train_loss:3.4523 train_time:5469047ms step_avg:2119.79ms step:2591/3242 train_loss:3.5580 train_time:5471160ms step_avg:2119.78ms step:2592/3242 train_loss:3.5967 train_time:5473276ms step_avg:2119.78ms step:2593/3242 train_loss:3.1862 train_time:5475399ms step_avg:2119.78ms step:2594/3242 train_loss:3.1917 train_time:5477511ms step_avg:2119.78ms step:2595/3242 train_loss:3.4463 train_time:5479623ms step_avg:2119.78ms step:2596/3242 train_loss:3.1924 train_time:5481749ms step_avg:2119.78ms step:2597/3242 train_loss:3.7293 train_time:5483864ms step_avg:2119.78ms step:2598/3242 train_loss:3.4043 train_time:5485978ms step_avg:2119.78ms step:2599/3242 train_loss:3.3721 train_time:5488099ms step_avg:2119.78ms step:2600/3242 train_loss:3.2271 train_time:5490218ms step_avg:2119.78ms step:2601/3242 train_loss:3.3074 train_time:5492345ms step_avg:2119.78ms step:2602/3242 train_loss:3.4662 train_time:5494467ms step_avg:2119.78ms step:2603/3242 train_loss:3.3469 train_time:5496585ms step_avg:2119.78ms step:2604/3242 train_loss:3.3064 train_time:5498702ms step_avg:2119.78ms step:2605/3242 train_loss:3.5953 train_time:5500816ms step_avg:2119.77ms step:2606/3242 train_loss:3.2642 train_time:5502939ms step_avg:2119.78ms step:2607/3242 train_loss:2.7624 train_time:5505057ms step_avg:2119.78ms step:2608/3242 train_loss:3.3234 train_time:5507177ms step_avg:2119.78ms step:2609/3242 train_loss:3.3395 train_time:5509299ms step_avg:2119.78ms step:2610/3242 train_loss:3.5379 train_time:5511419ms step_avg:2119.78ms step:2611/3242 train_loss:3.3394 train_time:5513538ms step_avg:2119.78ms step:2612/3242 train_loss:3.4429 train_time:5515650ms step_avg:2119.77ms step:2613/3242 train_loss:3.3500 train_time:5517764ms step_avg:2119.77ms step:2614/3242 train_loss:3.4103 train_time:5519889ms step_avg:2119.77ms step:2615/3242 train_loss:3.4041 train_time:5522012ms step_avg:2119.77ms step:2616/3242 train_loss:3.4657 train_time:5524127ms step_avg:2119.77ms step:2617/3242 train_loss:3.1694 train_time:5526245ms step_avg:2119.77ms step:2618/3242 train_loss:3.6111 train_time:5528367ms step_avg:2119.77ms step:2619/3242 train_loss:3.5714 train_time:5530479ms step_avg:2119.77ms step:2620/3242 train_loss:3.2513 train_time:5532599ms step_avg:2119.77ms step:2621/3242 train_loss:3.5364 train_time:5534716ms step_avg:2119.77ms step:2622/3242 train_loss:3.3666 train_time:5536831ms step_avg:2119.77ms step:2623/3242 train_loss:3.3293 train_time:5538950ms step_avg:2119.77ms step:2624/3242 train_loss:3.3322 train_time:5541064ms step_avg:2119.76ms step:2625/3242 train_loss:3.5324 train_time:5543185ms step_avg:2119.76ms step:2625/3242 val_loss:3.3591 train_time:5543599ms step_avg:2119.92ms step:2626/3242 train_loss:3.5944 train_time:5545316ms step_avg:2119.77ms step:2627/3242 train_loss:3.7381 train_time:5547433ms step_avg:2119.77ms step:2628/3242 train_loss:3.6102 train_time:5549547ms step_avg:2119.77ms step:2629/3242 train_loss:3.4330 train_time:5551668ms step_avg:2119.77ms step:2630/3242 train_loss:3.4446 train_time:5553784ms step_avg:2119.76ms step:2631/3242 train_loss:3.1253 train_time:5555910ms step_avg:2119.77ms step:2632/3242 train_loss:3.3672 train_time:5558029ms step_avg:2119.77ms step:2633/3242 train_loss:3.1377 train_time:5560149ms step_avg:2119.77ms step:2634/3242 train_loss:3.5488 train_time:5562265ms step_avg:2119.77ms step:2635/3242 train_loss:3.5465 train_time:5564389ms step_avg:2119.77ms step:2636/3242 train_loss:3.3629 train_time:5566504ms step_avg:2119.77ms step:2637/3242 train_loss:3.5346 train_time:5568628ms step_avg:2119.77ms step:2638/3242 train_loss:3.5535 train_time:5570748ms step_avg:2119.77ms step:2639/3242 train_loss:3.3850 train_time:5572866ms step_avg:2119.77ms step:2640/3242 train_loss:3.2145 train_time:5574984ms step_avg:2119.77ms step:2641/3242 train_loss:3.5568 train_time:5577107ms step_avg:2119.77ms step:2642/3242 train_loss:3.0494 train_time:5579227ms step_avg:2119.77ms step:2643/3242 train_loss:3.4571 train_time:5581348ms step_avg:2119.77ms step:2644/3242 train_loss:3.2921 train_time:5583466ms step_avg:2119.77ms step:2645/3242 train_loss:2.9145 train_time:5585582ms step_avg:2119.77ms step:2646/3242 train_loss:3.3237 train_time:5587698ms step_avg:2119.76ms step:2647/3242 train_loss:3.3514 train_time:5589819ms step_avg:2119.76ms step:2648/3242 train_loss:3.1341 train_time:5591944ms step_avg:2119.77ms step:2649/3242 train_loss:3.2728 train_time:5594058ms step_avg:2119.76ms step:2650/3242 train_loss:3.5836 train_time:5596178ms step_avg:2119.76ms step:2651/3242 train_loss:3.3644 train_time:5598301ms step_avg:2119.77ms step:2652/3242 train_loss:3.5077 train_time:5600417ms step_avg:2119.76ms step:2653/3242 train_loss:3.4404 train_time:5602538ms step_avg:2119.76ms step:2654/3242 train_loss:3.4557 train_time:5604655ms step_avg:2119.76ms step:2655/3242 train_loss:3.1686 train_time:5606774ms step_avg:2119.76ms step:2656/3242 train_loss:3.2994 train_time:5608898ms step_avg:2119.77ms step:2657/3242 train_loss:3.3139 train_time:5611017ms step_avg:2119.76ms step:2658/3242 train_loss:3.3062 train_time:5613134ms step_avg:2119.76ms step:2659/3242 train_loss:3.2552 train_time:5615250ms step_avg:2119.76ms step:2660/3242 train_loss:3.3566 train_time:5617374ms step_avg:2119.76ms step:2661/3242 train_loss:3.3625 train_time:5619494ms step_avg:2119.76ms step:2662/3242 train_loss:3.2118 train_time:5621609ms step_avg:2119.76ms step:2663/3242 train_loss:3.0232 train_time:5623732ms step_avg:2119.76ms step:2664/3242 train_loss:3.3309 train_time:5625841ms step_avg:2119.76ms step:2665/3242 train_loss:3.3498 train_time:5627969ms step_avg:2119.76ms step:2666/3242 train_loss:3.3166 train_time:5630087ms step_avg:2119.76ms step:2667/3242 train_loss:3.2098 train_time:5632336ms step_avg:2119.81ms step:2668/3242 train_loss:3.4284 train_time:5634460ms step_avg:2119.81ms step:2669/3242 train_loss:3.4173 train_time:5636580ms step_avg:2119.81ms step:2670/3242 train_loss:3.3304 train_time:5638698ms step_avg:2119.81ms step:2671/3242 train_loss:3.2465 train_time:5640834ms step_avg:2119.82ms step:2672/3242 train_loss:3.5025 train_time:5642951ms step_avg:2119.82ms step:2673/3242 train_loss:3.3231 train_time:5645064ms step_avg:2119.81ms step:2674/3242 train_loss:3.1600 train_time:5647179ms step_avg:2119.81ms step:2675/3242 train_loss:3.4602 train_time:5649301ms step_avg:2119.81ms step:2676/3242 train_loss:3.2840 train_time:5651420ms step_avg:2119.81ms step:2677/3242 train_loss:3.4460 train_time:5653540ms step_avg:2119.81ms step:2678/3242 train_loss:3.2974 train_time:5655665ms step_avg:2119.81ms step:2679/3242 train_loss:3.2504 train_time:5657790ms step_avg:2119.82ms step:2680/3242 train_loss:3.3553 train_time:5659905ms step_avg:2119.81ms step:2681/3242 train_loss:3.1711 train_time:5662014ms step_avg:2119.81ms step:2682/3242 train_loss:3.6416 train_time:5664138ms step_avg:2119.81ms step:2683/3242 train_loss:3.1858 train_time:5666260ms step_avg:2119.81ms step:2684/3242 train_loss:3.2146 train_time:5668375ms step_avg:2119.81ms step:2685/3242 train_loss:3.9691 train_time:5670498ms step_avg:2119.81ms step:2686/3242 train_loss:3.5317 train_time:5672611ms step_avg:2119.81ms step:2687/3242 train_loss:3.2326 train_time:5674731ms step_avg:2119.81ms step:2688/3242 train_loss:3.3651 train_time:5676855ms step_avg:2119.81ms step:2689/3242 train_loss:3.4422 train_time:5678969ms step_avg:2119.81ms step:2690/3242 train_loss:3.4255 train_time:5681090ms step_avg:2119.81ms step:2691/3242 train_loss:3.4213 train_time:5683212ms step_avg:2119.81ms step:2692/3242 train_loss:3.5566 train_time:5685330ms step_avg:2119.81ms step:2693/3242 train_loss:3.3279 train_time:5687451ms step_avg:2119.81ms step:2694/3242 train_loss:3.1941 train_time:5689566ms step_avg:2119.81ms step:2695/3242 train_loss:3.6920 train_time:5691689ms step_avg:2119.81ms step:2696/3242 train_loss:3.2931 train_time:5693809ms step_avg:2119.81ms step:2697/3242 train_loss:3.3598 train_time:5695929ms step_avg:2119.81ms step:2698/3242 train_loss:3.3478 train_time:5698047ms step_avg:2119.81ms step:2699/3242 train_loss:3.3328 train_time:5700161ms step_avg:2119.81ms step:2700/3242 train_loss:3.1623 train_time:5702284ms step_avg:2119.81ms step:2701/3242 train_loss:3.2491 train_time:5704403ms step_avg:2119.81ms step:2702/3242 train_loss:3.2573 train_time:5706522ms step_avg:2119.81ms step:2703/3242 train_loss:3.3734 train_time:5708643ms step_avg:2119.81ms step:2704/3242 train_loss:3.3853 train_time:5710766ms step_avg:2119.81ms step:2705/3242 train_loss:3.3590 train_time:5712880ms step_avg:2119.81ms step:2706/3242 train_loss:3.7085 train_time:5714997ms step_avg:2119.81ms step:2707/3242 train_loss:3.3417 train_time:5717111ms step_avg:2119.80ms step:2708/3242 train_loss:3.4201 train_time:5719225ms step_avg:2119.80ms step:2709/3242 train_loss:3.2609 train_time:5721345ms step_avg:2119.80ms step:2710/3242 train_loss:3.2755 train_time:5723464ms step_avg:2119.80ms step:2711/3242 train_loss:3.3978 train_time:5725576ms step_avg:2119.80ms step:2712/3242 train_loss:3.2871 train_time:5727694ms step_avg:2119.80ms step:2713/3242 train_loss:3.5066 train_time:5729820ms step_avg:2119.80ms step:2714/3242 train_loss:3.3273 train_time:5731936ms step_avg:2119.80ms step:2715/3242 train_loss:3.1108 train_time:5734060ms step_avg:2119.80ms step:2716/3242 train_loss:3.4645 train_time:5736179ms step_avg:2119.80ms step:2717/3242 train_loss:3.5783 train_time:5738296ms step_avg:2119.80ms step:2718/3242 train_loss:3.5315 train_time:5740409ms step_avg:2119.80ms step:2719/3242 train_loss:3.5559 train_time:5742532ms step_avg:2119.80ms step:2720/3242 train_loss:3.4227 train_time:5744649ms step_avg:2119.80ms step:2721/3242 train_loss:3.3299 train_time:5746770ms step_avg:2119.80ms step:2722/3242 train_loss:3.2123 train_time:5748896ms step_avg:2119.80ms step:2723/3242 train_loss:3.1227 train_time:5751012ms step_avg:2119.80ms step:2724/3242 train_loss:3.2683 train_time:5753130ms step_avg:2119.80ms step:2725/3242 train_loss:4.0355 train_time:5755248ms step_avg:2119.80ms step:2726/3242 train_loss:4.2679 train_time:5757366ms step_avg:2119.80ms step:2727/3242 train_loss:3.2432 train_time:5759490ms step_avg:2119.80ms step:2728/3242 train_loss:3.3761 train_time:5761610ms step_avg:2119.80ms step:2729/3242 train_loss:3.3419 train_time:5763720ms step_avg:2119.79ms step:2730/3242 train_loss:3.3791 train_time:5765838ms step_avg:2119.79ms step:2731/3242 train_loss:3.4337 train_time:5767960ms step_avg:2119.79ms step:2732/3242 train_loss:3.2469 train_time:5770081ms step_avg:2119.79ms step:2733/3242 train_loss:3.3649 train_time:5772197ms step_avg:2119.79ms step:2734/3242 train_loss:3.3464 train_time:5774316ms step_avg:2119.79ms step:2735/3242 train_loss:3.2338 train_time:5776436ms step_avg:2119.79ms step:2736/3242 train_loss:3.3051 train_time:5778558ms step_avg:2119.79ms step:2737/3242 train_loss:3.2963 train_time:5780679ms step_avg:2119.79ms step:2738/3242 train_loss:3.2521 train_time:5782794ms step_avg:2119.79ms step:2739/3242 train_loss:3.3969 train_time:5784913ms step_avg:2119.79ms step:2740/3242 train_loss:3.7157 train_time:5787037ms step_avg:2119.79ms step:2741/3242 train_loss:3.2414 train_time:5789152ms step_avg:2119.79ms step:2742/3242 train_loss:3.3609 train_time:5791267ms step_avg:2119.79ms step:2743/3242 train_loss:3.1502 train_time:5793385ms step_avg:2119.79ms step:2744/3242 train_loss:3.6301 train_time:5795507ms step_avg:2119.79ms step:2745/3242 train_loss:3.2407 train_time:5797629ms step_avg:2119.79ms step:2746/3242 train_loss:3.3231 train_time:5799740ms step_avg:2119.79ms step:2747/3242 train_loss:3.3985 train_time:5801857ms step_avg:2119.79ms step:2748/3242 train_loss:3.3098 train_time:5803971ms step_avg:2119.78ms step:2749/3242 train_loss:3.2774 train_time:5806093ms step_avg:2119.79ms step:2750/3242 train_loss:3.4929 train_time:5808210ms step_avg:2119.78ms step:2750/3242 val_loss:3.3382 train_time:5808622ms step_avg:2119.94ms step:2751/3242 train_loss:3.3563 train_time:5810336ms step_avg:2119.79ms step:2752/3242 train_loss:3.1683 train_time:5812450ms step_avg:2119.78ms step:2753/3242 train_loss:3.3440 train_time:5814569ms step_avg:2119.78ms step:2754/3242 train_loss:3.0866 train_time:5816686ms step_avg:2119.78ms step:2755/3242 train_loss:3.5071 train_time:5818812ms step_avg:2119.79ms step:2756/3242 train_loss:3.5356 train_time:5820926ms step_avg:2119.78ms step:2757/3242 train_loss:3.3653 train_time:5823049ms step_avg:2119.78ms step:2758/3242 train_loss:3.1052 train_time:5825165ms step_avg:2119.78ms step:2759/3242 train_loss:3.2979 train_time:5827275ms step_avg:2119.78ms step:2760/3242 train_loss:3.4185 train_time:5829392ms step_avg:2119.78ms step:2761/3242 train_loss:3.3043 train_time:5831510ms step_avg:2119.78ms step:2762/3242 train_loss:3.1873 train_time:5833624ms step_avg:2119.78ms step:2763/3242 train_loss:3.2711 train_time:5835744ms step_avg:2119.78ms step:2764/3242 train_loss:3.4488 train_time:5837863ms step_avg:2119.78ms step:2765/3242 train_loss:3.1292 train_time:5839985ms step_avg:2119.78ms step:2766/3242 train_loss:3.6651 train_time:5842103ms step_avg:2119.78ms step:2767/3242 train_loss:3.1282 train_time:5844222ms step_avg:2119.78ms step:2768/3242 train_loss:3.3620 train_time:5846342ms step_avg:2119.78ms step:2769/3242 train_loss:3.3332 train_time:5848457ms step_avg:2119.77ms step:2770/3242 train_loss:3.3339 train_time:5850581ms step_avg:2119.78ms step:2771/3242 train_loss:3.1505 train_time:5852695ms step_avg:2119.77ms step:2772/3242 train_loss:3.4535 train_time:5854816ms step_avg:2119.77ms step:2773/3242 train_loss:3.2274 train_time:5856940ms step_avg:2119.78ms step:2774/3242 train_loss:3.2750 train_time:5859053ms step_avg:2119.77ms step:2775/3242 train_loss:3.2748 train_time:5861180ms step_avg:2119.78ms step:2776/3242 train_loss:3.3396 train_time:5863302ms step_avg:2119.78ms step:2777/3242 train_loss:3.2455 train_time:5865423ms step_avg:2119.78ms step:2778/3242 train_loss:3.4131 train_time:5867532ms step_avg:2119.77ms step:2779/3242 train_loss:3.4299 train_time:5869655ms step_avg:2119.77ms step:2780/3242 train_loss:3.3615 train_time:5871773ms step_avg:2119.77ms step:2781/3242 train_loss:3.3939 train_time:5873887ms step_avg:2119.77ms step:2782/3242 train_loss:3.4685 train_time:5876012ms step_avg:2119.77ms step:2783/3242 train_loss:3.3451 train_time:5878125ms step_avg:2119.77ms step:2784/3242 train_loss:3.6689 train_time:5880250ms step_avg:2119.77ms step:2785/3242 train_loss:2.9960 train_time:5882369ms step_avg:2119.77ms step:2786/3242 train_loss:3.2932 train_time:5884491ms step_avg:2119.77ms step:2787/3242 train_loss:3.3841 train_time:5886611ms step_avg:2119.77ms step:2788/3242 train_loss:3.2215 train_time:5888724ms step_avg:2119.77ms step:2789/3242 train_loss:3.2694 train_time:5890845ms step_avg:2119.77ms step:2790/3242 train_loss:3.2918 train_time:5892961ms step_avg:2119.77ms step:2791/3242 train_loss:3.3107 train_time:5895087ms step_avg:2119.77ms step:2792/3242 train_loss:3.4870 train_time:5897208ms step_avg:2119.77ms step:2793/3242 train_loss:3.2279 train_time:5899321ms step_avg:2119.77ms step:2794/3242 train_loss:3.3718 train_time:5901441ms step_avg:2119.77ms step:2795/3242 train_loss:3.3924 train_time:5903556ms step_avg:2119.77ms step:2796/3242 train_loss:3.7596 train_time:5905685ms step_avg:2119.77ms step:2797/3242 train_loss:3.3330 train_time:5907805ms step_avg:2119.77ms step:2798/3242 train_loss:3.2704 train_time:5909919ms step_avg:2119.77ms step:2799/3242 train_loss:3.2872 train_time:5912036ms step_avg:2119.77ms step:2800/3242 train_loss:3.4041 train_time:5914159ms step_avg:2119.77ms step:2801/3242 train_loss:3.2599 train_time:5916277ms step_avg:2119.77ms step:2802/3242 train_loss:3.0748 train_time:5918397ms step_avg:2119.77ms step:2803/3242 train_loss:3.4697 train_time:5920519ms step_avg:2119.77ms step:2804/3242 train_loss:3.3954 train_time:5922635ms step_avg:2119.77ms step:2805/3242 train_loss:3.3316 train_time:5924752ms step_avg:2119.77ms step:2806/3242 train_loss:3.2538 train_time:5926873ms step_avg:2119.77ms step:2807/3242 train_loss:3.5196 train_time:5928995ms step_avg:2119.77ms step:2808/3242 train_loss:3.2324 train_time:5931116ms step_avg:2119.77ms step:2809/3242 train_loss:3.4950 train_time:5933245ms step_avg:2119.77ms step:2810/3242 train_loss:3.3470 train_time:5935360ms step_avg:2119.77ms step:2811/3242 train_loss:3.5355 train_time:5937480ms step_avg:2119.77ms step:2812/3242 train_loss:3.0544 train_time:5939604ms step_avg:2119.77ms step:2813/3242 train_loss:3.3896 train_time:5941719ms step_avg:2119.77ms step:2814/3242 train_loss:3.6650 train_time:5943841ms step_avg:2119.77ms step:2815/3242 train_loss:3.6735 train_time:5945955ms step_avg:2119.77ms step:2816/3242 train_loss:3.2790 train_time:5948075ms step_avg:2119.77ms step:2817/3242 train_loss:3.2713 train_time:5950193ms step_avg:2119.77ms step:2818/3242 train_loss:3.5066 train_time:5952315ms step_avg:2119.77ms step:2819/3242 train_loss:3.3986 train_time:5954429ms step_avg:2119.77ms step:2820/3242 train_loss:3.5911 train_time:5956547ms step_avg:2119.77ms step:2821/3242 train_loss:3.4309 train_time:5958664ms step_avg:2119.77ms step:2822/3242 train_loss:3.3984 train_time:5960775ms step_avg:2119.76ms step:2823/3242 train_loss:3.3446 train_time:5962891ms step_avg:2119.76ms step:2824/3242 train_loss:3.3862 train_time:5965018ms step_avg:2119.76ms step:2825/3242 train_loss:3.5183 train_time:5967136ms step_avg:2119.76ms step:2826/3242 train_loss:3.2345 train_time:5969257ms step_avg:2119.76ms step:2827/3242 train_loss:3.1691 train_time:5971373ms step_avg:2119.76ms step:2828/3242 train_loss:3.3255 train_time:5973489ms step_avg:2119.76ms step:2829/3242 train_loss:3.2482 train_time:5975611ms step_avg:2119.76ms step:2830/3242 train_loss:3.3628 train_time:5977729ms step_avg:2119.76ms step:2831/3242 train_loss:3.2587 train_time:5979851ms step_avg:2119.76ms step:2832/3242 train_loss:3.3023 train_time:5981974ms step_avg:2119.76ms step:2833/3242 train_loss:3.4771 train_time:5984085ms step_avg:2119.76ms step:2834/3242 train_loss:3.2517 train_time:5986207ms step_avg:2119.76ms step:2835/3242 train_loss:3.1804 train_time:5988323ms step_avg:2119.76ms step:2836/3242 train_loss:3.7603 train_time:5990442ms step_avg:2119.76ms step:2837/3242 train_loss:3.3705 train_time:5992565ms step_avg:2119.76ms step:2838/3242 train_loss:3.7688 train_time:5994680ms step_avg:2119.76ms step:2839/3242 train_loss:3.2815 train_time:5996798ms step_avg:2119.76ms step:2840/3242 train_loss:3.0609 train_time:5998913ms step_avg:2119.76ms step:2841/3242 train_loss:3.2857 train_time:6001031ms step_avg:2119.76ms step:2842/3242 train_loss:3.0669 train_time:6003145ms step_avg:2119.75ms step:2843/3242 train_loss:2.9614 train_time:6005258ms step_avg:2119.75ms step:2844/3242 train_loss:3.3177 train_time:6007378ms step_avg:2119.75ms step:2845/3242 train_loss:3.4875 train_time:6009497ms step_avg:2119.75ms step:2846/3242 train_loss:3.0735 train_time:6011618ms step_avg:2119.75ms step:2847/3242 train_loss:3.2884 train_time:6013733ms step_avg:2119.75ms step:2848/3242 train_loss:3.4004 train_time:6015859ms step_avg:2119.75ms step:2849/3242 train_loss:3.2786 train_time:6017974ms step_avg:2119.75ms step:2850/3242 train_loss:3.6624 train_time:6020099ms step_avg:2119.75ms step:2851/3242 train_loss:3.3932 train_time:6022221ms step_avg:2119.75ms step:2852/3242 train_loss:3.2952 train_time:6024337ms step_avg:2119.75ms step:2853/3242 train_loss:3.1740 train_time:6026462ms step_avg:2119.75ms step:2854/3242 train_loss:3.4881 train_time:6028578ms step_avg:2119.75ms step:2855/3242 train_loss:3.5567 train_time:6030693ms step_avg:2119.75ms step:2856/3242 train_loss:3.2968 train_time:6032810ms step_avg:2119.75ms step:2857/3242 train_loss:3.2442 train_time:6034934ms step_avg:2119.75ms step:2858/3242 train_loss:3.2125 train_time:6037187ms step_avg:2119.80ms step:2859/3242 train_loss:3.2651 train_time:6039294ms step_avg:2119.79ms step:2860/3242 train_loss:3.3710 train_time:6041417ms step_avg:2119.80ms step:2861/3242 train_loss:3.2825 train_time:6043537ms step_avg:2119.80ms step:2862/3242 train_loss:3.3492 train_time:6045652ms step_avg:2119.79ms step:2863/3242 train_loss:3.2442 train_time:6047766ms step_avg:2119.79ms step:2864/3242 train_loss:3.4141 train_time:6049887ms step_avg:2119.79ms step:2865/3242 train_loss:2.9505 train_time:6052008ms step_avg:2119.79ms step:2866/3242 train_loss:3.4178 train_time:6054128ms step_avg:2119.79ms step:2867/3242 train_loss:3.2724 train_time:6056244ms step_avg:2119.79ms step:2868/3242 train_loss:3.5048 train_time:6058364ms step_avg:2119.79ms step:2869/3242 train_loss:3.2459 train_time:6060485ms step_avg:2119.79ms step:2870/3242 train_loss:3.5320 train_time:6062602ms step_avg:2119.79ms step:2871/3242 train_loss:3.4724 train_time:6064727ms step_avg:2119.79ms step:2872/3242 train_loss:3.4740 train_time:6066850ms step_avg:2119.79ms step:2873/3242 train_loss:3.4703 train_time:6068965ms step_avg:2119.79ms step:2874/3242 train_loss:3.4484 train_time:6071083ms step_avg:2119.79ms step:2875/3242 train_loss:3.3000 train_time:6073198ms step_avg:2119.79ms step:2875/3242 val_loss:3.3181 train_time:6073611ms step_avg:2119.93ms step:2876/3242 train_loss:3.2756 train_time:6075324ms step_avg:2119.79ms step:2877/3242 train_loss:3.4287 train_time:6077446ms step_avg:2119.79ms step:2878/3242 train_loss:3.3138 train_time:6079554ms step_avg:2119.79ms step:2879/3242 train_loss:3.3499 train_time:6081668ms step_avg:2119.79ms step:2880/3242 train_loss:3.0601 train_time:6083794ms step_avg:2119.79ms step:2881/3242 train_loss:3.1318 train_time:6085910ms step_avg:2119.79ms step:2882/3242 train_loss:3.1334 train_time:6088028ms step_avg:2119.79ms step:2883/3242 train_loss:3.2360 train_time:6090144ms step_avg:2119.79ms step:2884/3242 train_loss:3.1943 train_time:6092272ms step_avg:2119.79ms step:2885/3242 train_loss:3.4012 train_time:6094393ms step_avg:2119.79ms step:2886/3242 train_loss:3.3008 train_time:6096502ms step_avg:2119.79ms step:2887/3242 train_loss:3.3891 train_time:6098618ms step_avg:2119.78ms step:2888/3242 train_loss:3.3827 train_time:6100738ms step_avg:2119.78ms step:2889/3242 train_loss:3.2798 train_time:6102854ms step_avg:2119.78ms step:2890/3242 train_loss:3.4059 train_time:6104968ms step_avg:2119.78ms step:2891/3242 train_loss:3.2695 train_time:6107077ms step_avg:2119.78ms step:2892/3242 train_loss:3.3054 train_time:6109198ms step_avg:2119.78ms step:2893/3242 train_loss:3.2896 train_time:6111317ms step_avg:2119.78ms step:2894/3242 train_loss:3.2487 train_time:6113436ms step_avg:2119.78ms step:2895/3242 train_loss:3.4751 train_time:6115556ms step_avg:2119.78ms step:2896/3242 train_loss:3.6082 train_time:6117679ms step_avg:2119.78ms step:2897/3242 train_loss:3.1609 train_time:6119799ms step_avg:2119.78ms step:2898/3242 train_loss:3.3134 train_time:6121924ms step_avg:2119.78ms step:2899/3242 train_loss:3.3128 train_time:6124045ms step_avg:2119.78ms step:2900/3242 train_loss:3.2186 train_time:6126154ms step_avg:2119.78ms step:2901/3242 train_loss:3.3696 train_time:6128275ms step_avg:2119.78ms step:2902/3242 train_loss:3.3050 train_time:6130395ms step_avg:2119.78ms step:2903/3242 train_loss:3.5851 train_time:6132522ms step_avg:2119.78ms step:2904/3242 train_loss:3.1864 train_time:6134641ms step_avg:2119.78ms step:2905/3242 train_loss:3.4421 train_time:6136763ms step_avg:2119.78ms step:2906/3242 train_loss:3.0832 train_time:6138870ms step_avg:2119.78ms step:2907/3242 train_loss:3.4780 train_time:6140991ms step_avg:2119.78ms step:2908/3242 train_loss:3.4978 train_time:6143105ms step_avg:2119.77ms step:2909/3242 train_loss:3.1690 train_time:6145224ms step_avg:2119.77ms step:2910/3242 train_loss:3.3113 train_time:6147345ms step_avg:2119.77ms step:2911/3242 train_loss:3.2978 train_time:6149468ms step_avg:2119.78ms step:2912/3242 train_loss:3.4161 train_time:6151593ms step_avg:2119.78ms step:2913/3242 train_loss:3.1410 train_time:6153706ms step_avg:2119.77ms step:2914/3242 train_loss:3.3610 train_time:6155823ms step_avg:2119.77ms step:2915/3242 train_loss:3.1954 train_time:6157946ms step_avg:2119.77ms step:2916/3242 train_loss:3.4185 train_time:6160068ms step_avg:2119.78ms step:2917/3242 train_loss:2.9906 train_time:6162192ms step_avg:2119.78ms step:2918/3242 train_loss:3.4247 train_time:6164299ms step_avg:2119.77ms step:2919/3242 train_loss:3.3535 train_time:6166421ms step_avg:2119.77ms step:2920/3242 train_loss:3.1851 train_time:6168539ms step_avg:2119.77ms step:2921/3242 train_loss:3.3948 train_time:6170657ms step_avg:2119.77ms step:2922/3242 train_loss:3.2337 train_time:6172778ms step_avg:2119.77ms step:2923/3242 train_loss:3.3096 train_time:6174894ms step_avg:2119.77ms step:2924/3242 train_loss:3.2365 train_time:6177013ms step_avg:2119.77ms step:2925/3242 train_loss:3.3470 train_time:6179132ms step_avg:2119.77ms step:2926/3242 train_loss:3.3529 train_time:6181250ms step_avg:2119.77ms step:2927/3242 train_loss:3.2265 train_time:6183374ms step_avg:2119.77ms step:2928/3242 train_loss:3.2512 train_time:6185487ms step_avg:2119.77ms step:2929/3242 train_loss:3.3691 train_time:6187603ms step_avg:2119.77ms step:2930/3242 train_loss:3.2340 train_time:6189719ms step_avg:2119.77ms step:2931/3242 train_loss:3.3486 train_time:6191837ms step_avg:2119.77ms step:2932/3242 train_loss:3.4340 train_time:6193961ms step_avg:2119.77ms step:2933/3242 train_loss:3.3363 train_time:6196076ms step_avg:2119.77ms step:2934/3242 train_loss:3.5248 train_time:6198197ms step_avg:2119.77ms step:2935/3242 train_loss:3.3262 train_time:6200312ms step_avg:2119.76ms step:2936/3242 train_loss:3.2616 train_time:6202431ms step_avg:2119.76ms step:2937/3242 train_loss:3.4063 train_time:6204550ms step_avg:2119.76ms step:2938/3242 train_loss:3.2609 train_time:6206665ms step_avg:2119.76ms step:2939/3242 train_loss:3.8417 train_time:6208783ms step_avg:2119.76ms step:2940/3242 train_loss:3.4407 train_time:6210905ms step_avg:2119.76ms step:2941/3242 train_loss:3.3522 train_time:6213026ms step_avg:2119.76ms step:2942/3242 train_loss:3.3714 train_time:6215142ms step_avg:2119.76ms step:2943/3242 train_loss:3.1978 train_time:6217268ms step_avg:2119.76ms step:2944/3242 train_loss:3.2651 train_time:6219387ms step_avg:2119.76ms step:2945/3242 train_loss:3.3982 train_time:6221508ms step_avg:2119.76ms step:2946/3242 train_loss:3.2032 train_time:6223625ms step_avg:2119.76ms step:2947/3242 train_loss:3.2238 train_time:6225762ms step_avg:2119.77ms step:2948/3242 train_loss:3.4100 train_time:6227881ms step_avg:2119.77ms step:2949/3242 train_loss:3.2938 train_time:6229991ms step_avg:2119.77ms step:2950/3242 train_loss:3.0262 train_time:6232114ms step_avg:2119.77ms step:2951/3242 train_loss:3.2638 train_time:6234240ms step_avg:2119.77ms step:2952/3242 train_loss:3.3472 train_time:6236356ms step_avg:2119.77ms step:2953/3242 train_loss:3.3733 train_time:6238475ms step_avg:2119.77ms step:2954/3242 train_loss:3.3392 train_time:6240593ms step_avg:2119.77ms step:2955/3242 train_loss:3.3497 train_time:6242709ms step_avg:2119.77ms step:2956/3242 train_loss:3.3606 train_time:6244827ms step_avg:2119.76ms step:2957/3242 train_loss:3.3615 train_time:6246942ms step_avg:2119.76ms step:2958/3242 train_loss:3.2681 train_time:6249067ms step_avg:2119.76ms step:2959/3242 train_loss:3.2512 train_time:6251184ms step_avg:2119.76ms step:2960/3242 train_loss:3.0898 train_time:6253304ms step_avg:2119.76ms step:2961/3242 train_loss:3.3322 train_time:6255422ms step_avg:2119.76ms step:2962/3242 train_loss:3.2159 train_time:6257542ms step_avg:2119.76ms step:2963/3242 train_loss:3.3699 train_time:6259662ms step_avg:2119.76ms step:2964/3242 train_loss:3.4231 train_time:6261785ms step_avg:2119.76ms step:2965/3242 train_loss:3.2943 train_time:6263899ms step_avg:2119.76ms step:2966/3242 train_loss:3.5520 train_time:6266021ms step_avg:2119.76ms step:2967/3242 train_loss:3.2582 train_time:6268141ms step_avg:2119.76ms step:2968/3242 train_loss:3.4697 train_time:6270261ms step_avg:2119.76ms step:2969/3242 train_loss:3.2475 train_time:6272383ms step_avg:2119.76ms step:2970/3242 train_loss:3.2196 train_time:6274509ms step_avg:2119.77ms step:2971/3242 train_loss:3.1198 train_time:6276619ms step_avg:2119.76ms step:2972/3242 train_loss:3.4165 train_time:6278745ms step_avg:2119.77ms step:2973/3242 train_loss:3.1148 train_time:6280860ms step_avg:2119.76ms step:2974/3242 train_loss:3.9623 train_time:6282979ms step_avg:2119.76ms step:2975/3242 train_loss:3.2887 train_time:6285098ms step_avg:2119.76ms step:2976/3242 train_loss:3.1509 train_time:6287220ms step_avg:2119.76ms step:2977/3242 train_loss:3.4488 train_time:6289335ms step_avg:2119.76ms step:2978/3242 train_loss:3.4397 train_time:6291467ms step_avg:2119.77ms step:2979/3242 train_loss:3.0679 train_time:6293583ms step_avg:2119.77ms step:2980/3242 train_loss:3.1608 train_time:6295698ms step_avg:2119.76ms step:2981/3242 train_loss:3.2256 train_time:6297826ms step_avg:2119.77ms step:2982/3242 train_loss:3.2498 train_time:6299936ms step_avg:2119.76ms step:2983/3242 train_loss:2.9359 train_time:6302060ms step_avg:2119.76ms step:2984/3242 train_loss:3.5974 train_time:6304172ms step_avg:2119.76ms step:2985/3242 train_loss:3.2125 train_time:6306294ms step_avg:2119.76ms step:2986/3242 train_loss:3.5671 train_time:6308422ms step_avg:2119.77ms step:2987/3242 train_loss:3.3421 train_time:6310539ms step_avg:2119.76ms step:2988/3242 train_loss:3.6025 train_time:6312652ms step_avg:2119.76ms step:2989/3242 train_loss:3.4006 train_time:6314771ms step_avg:2119.76ms step:2990/3242 train_loss:3.3642 train_time:6316895ms step_avg:2119.76ms step:2991/3242 train_loss:3.0409 train_time:6319007ms step_avg:2119.76ms step:2992/3242 train_loss:3.5225 train_time:6321124ms step_avg:2119.76ms step:2993/3242 train_loss:3.2448 train_time:6323244ms step_avg:2119.76ms step:2994/3242 train_loss:3.5730 train_time:6325354ms step_avg:2119.76ms step:2995/3242 train_loss:3.1875 train_time:6327470ms step_avg:2119.76ms step:2996/3242 train_loss:3.4517 train_time:6329590ms step_avg:2119.76ms step:2997/3242 train_loss:3.3433 train_time:6331706ms step_avg:2119.75ms step:2998/3242 train_loss:3.2675 train_time:6333824ms step_avg:2119.75ms step:2999/3242 train_loss:3.1222 train_time:6335940ms step_avg:2119.75ms step:3000/3242 train_loss:3.8397 train_time:6338064ms step_avg:2119.75ms step:3000/3242 val_loss:3.3002 train_time:6338477ms step_avg:2119.89ms step:3001/3242 train_loss:3.0700 train_time:6340192ms step_avg:2119.76ms step:3002/3242 train_loss:3.2138 train_time:6342301ms step_avg:2119.75ms step:3003/3242 train_loss:2.9899 train_time:6344417ms step_avg:2119.75ms step:3004/3242 train_loss:3.2510 train_time:6346541ms step_avg:2119.75ms step:3005/3242 train_loss:3.3430 train_time:6348656ms step_avg:2119.75ms step:3006/3242 train_loss:3.1821 train_time:6350780ms step_avg:2119.75ms step:3007/3242 train_loss:3.4596 train_time:6352901ms step_avg:2119.75ms step:3008/3242 train_loss:3.1350 train_time:6355012ms step_avg:2119.75ms step:3009/3242 train_loss:3.2444 train_time:6357136ms step_avg:2119.75ms step:3010/3242 train_loss:3.1102 train_time:6359257ms step_avg:2119.75ms step:3011/3242 train_loss:3.3098 train_time:6361378ms step_avg:2119.75ms step:3012/3242 train_loss:3.3694 train_time:6363493ms step_avg:2119.75ms step:3013/3242 train_loss:3.2723 train_time:6365610ms step_avg:2119.75ms step:3014/3242 train_loss:2.9779 train_time:6367733ms step_avg:2119.75ms step:3015/3242 train_loss:3.4942 train_time:6369855ms step_avg:2119.75ms step:3016/3242 train_loss:3.4860 train_time:6371975ms step_avg:2119.75ms step:3017/3242 train_loss:3.3694 train_time:6374085ms step_avg:2119.75ms step:3018/3242 train_loss:3.1293 train_time:6376212ms step_avg:2119.75ms step:3019/3242 train_loss:3.0690 train_time:6378327ms step_avg:2119.75ms step:3020/3242 train_loss:3.1977 train_time:6380445ms step_avg:2119.75ms step:3021/3242 train_loss:3.3916 train_time:6382571ms step_avg:2119.75ms step:3022/3242 train_loss:3.1974 train_time:6384696ms step_avg:2119.75ms step:3023/3242 train_loss:3.1839 train_time:6386812ms step_avg:2119.75ms step:3024/3242 train_loss:3.4033 train_time:6388923ms step_avg:2119.75ms step:3025/3242 train_loss:3.5668 train_time:6391041ms step_avg:2119.75ms step:3026/3242 train_loss:3.2817 train_time:6393163ms step_avg:2119.75ms step:3027/3242 train_loss:3.7684 train_time:6395288ms step_avg:2119.75ms step:3028/3242 train_loss:3.2211 train_time:6397398ms step_avg:2119.75ms step:3029/3242 train_loss:3.0785 train_time:6399521ms step_avg:2119.75ms step:3030/3242 train_loss:3.1225 train_time:6401642ms step_avg:2119.75ms step:3031/3242 train_loss:3.6003 train_time:6403764ms step_avg:2119.75ms step:3032/3242 train_loss:3.2711 train_time:6405880ms step_avg:2119.75ms step:3033/3242 train_loss:3.5171 train_time:6408003ms step_avg:2119.75ms step:3034/3242 train_loss:3.1818 train_time:6410115ms step_avg:2119.75ms step:3035/3242 train_loss:2.9355 train_time:6412232ms step_avg:2119.75ms step:3036/3242 train_loss:3.3554 train_time:6414355ms step_avg:2119.75ms step:3037/3242 train_loss:3.0546 train_time:6416474ms step_avg:2119.75ms step:3038/3242 train_loss:2.9749 train_time:6418591ms step_avg:2119.75ms step:3039/3242 train_loss:3.5151 train_time:6420713ms step_avg:2119.75ms step:3040/3242 train_loss:3.7323 train_time:6422838ms step_avg:2119.75ms step:3041/3242 train_loss:3.0899 train_time:6424955ms step_avg:2119.75ms step:3042/3242 train_loss:3.1911 train_time:6427070ms step_avg:2119.75ms step:3043/3242 train_loss:3.3358 train_time:6429193ms step_avg:2119.75ms step:3044/3242 train_loss:3.0740 train_time:6431305ms step_avg:2119.74ms step:3045/3242 train_loss:3.3018 train_time:6433427ms step_avg:2119.75ms step:3046/3242 train_loss:3.2639 train_time:6435547ms step_avg:2119.75ms step:3047/3242 train_loss:3.4696 train_time:6437663ms step_avg:2119.74ms step:3048/3242 train_loss:3.6329 train_time:6439917ms step_avg:2119.79ms step:3049/3242 train_loss:3.2149 train_time:6442043ms step_avg:2119.79ms step:3050/3242 train_loss:3.1968 train_time:6444170ms step_avg:2119.79ms step:3051/3242 train_loss:3.3361 train_time:6446285ms step_avg:2119.79ms step:3052/3242 train_loss:3.3689 train_time:6448402ms step_avg:2119.79ms step:3053/3242 train_loss:3.3234 train_time:6450532ms step_avg:2119.79ms step:3054/3242 train_loss:3.1707 train_time:6452643ms step_avg:2119.79ms step:3055/3242 train_loss:3.4597 train_time:6454762ms step_avg:2119.79ms step:3056/3242 train_loss:3.4737 train_time:6456879ms step_avg:2119.79ms step:3057/3242 train_loss:3.1278 train_time:6458999ms step_avg:2119.79ms step:3058/3242 train_loss:3.2033 train_time:6461123ms step_avg:2119.79ms step:3059/3242 train_loss:3.2685 train_time:6463235ms step_avg:2119.79ms step:3060/3242 train_loss:3.2419 train_time:6465355ms step_avg:2119.79ms step:3061/3242 train_loss:3.2403 train_time:6467472ms step_avg:2119.79ms step:3062/3242 train_loss:3.5228 train_time:6469595ms step_avg:2119.79ms step:3063/3242 train_loss:3.1432 train_time:6471713ms step_avg:2119.79ms step:3064/3242 train_loss:3.3952 train_time:6473838ms step_avg:2119.79ms step:3065/3242 train_loss:3.2318 train_time:6475956ms step_avg:2119.79ms step:3066/3242 train_loss:3.1761 train_time:6478073ms step_avg:2119.79ms step:3067/3242 train_loss:3.2692 train_time:6480191ms step_avg:2119.79ms step:3068/3242 train_loss:3.3111 train_time:6482313ms step_avg:2119.79ms step:3069/3242 train_loss:3.2347 train_time:6484440ms step_avg:2119.79ms step:3070/3242 train_loss:3.3321 train_time:6486557ms step_avg:2119.79ms step:3071/3242 train_loss:3.4997 train_time:6488673ms step_avg:2119.79ms step:3072/3242 train_loss:3.0657 train_time:6490791ms step_avg:2119.79ms step:3073/3242 train_loss:3.3840 train_time:6492912ms step_avg:2119.79ms step:3074/3242 train_loss:3.2784 train_time:6495034ms step_avg:2119.79ms step:3075/3242 train_loss:3.4845 train_time:6497154ms step_avg:2119.79ms step:3076/3242 train_loss:3.6453 train_time:6499268ms step_avg:2119.79ms step:3077/3242 train_loss:3.5815 train_time:6501394ms step_avg:2119.79ms step:3078/3242 train_loss:3.2928 train_time:6503508ms step_avg:2119.79ms step:3079/3242 train_loss:4.0029 train_time:6505637ms step_avg:2119.79ms step:3080/3242 train_loss:3.2216 train_time:6507748ms step_avg:2119.79ms step:3081/3242 train_loss:3.4141 train_time:6509869ms step_avg:2119.79ms step:3082/3242 train_loss:3.7200 train_time:6511983ms step_avg:2119.79ms step:3083/3242 train_loss:3.2334 train_time:6514103ms step_avg:2119.79ms step:3084/3242 train_loss:3.1798 train_time:6516225ms step_avg:2119.79ms step:3085/3242 train_loss:3.4376 train_time:6518349ms step_avg:2119.79ms step:3086/3242 train_loss:3.4128 train_time:6520474ms step_avg:2119.79ms step:3087/3242 train_loss:3.2221 train_time:6522600ms step_avg:2119.79ms step:3088/3242 train_loss:3.1839 train_time:6524719ms step_avg:2119.79ms step:3089/3242 train_loss:3.0786 train_time:6526838ms step_avg:2119.79ms step:3090/3242 train_loss:3.2073 train_time:6528958ms step_avg:2119.79ms step:3091/3242 train_loss:3.8854 train_time:6531077ms step_avg:2119.79ms step:3092/3242 train_loss:3.2542 train_time:6533201ms step_avg:2119.79ms step:3093/3242 train_loss:3.7270 train_time:6535317ms step_avg:2119.79ms step:3094/3242 train_loss:3.2849 train_time:6537440ms step_avg:2119.79ms step:3095/3242 train_loss:3.3776 train_time:6539553ms step_avg:2119.79ms step:3096/3242 train_loss:3.2335 train_time:6541680ms step_avg:2119.79ms step:3097/3242 train_loss:3.5039 train_time:6543793ms step_avg:2119.79ms step:3098/3242 train_loss:3.2758 train_time:6545912ms step_avg:2119.79ms step:3099/3242 train_loss:3.3194 train_time:6548032ms step_avg:2119.79ms step:3100/3242 train_loss:3.5136 train_time:6550149ms step_avg:2119.79ms step:3101/3242 train_loss:3.2739 train_time:6552265ms step_avg:2119.79ms step:3102/3242 train_loss:3.3083 train_time:6554386ms step_avg:2119.79ms step:3103/3242 train_loss:3.0499 train_time:6556508ms step_avg:2119.79ms step:3104/3242 train_loss:3.3409 train_time:6558624ms step_avg:2119.79ms step:3105/3242 train_loss:3.0013 train_time:6560748ms step_avg:2119.79ms step:3106/3242 train_loss:3.2089 train_time:6562864ms step_avg:2119.79ms step:3107/3242 train_loss:3.2557 train_time:6564991ms step_avg:2119.79ms step:3108/3242 train_loss:3.1074 train_time:6567111ms step_avg:2119.79ms step:3109/3242 train_loss:2.9143 train_time:6569225ms step_avg:2119.79ms step:3110/3242 train_loss:3.2096 train_time:6571346ms step_avg:2119.79ms step:3111/3242 train_loss:3.2674 train_time:6573465ms step_avg:2119.79ms step:3112/3242 train_loss:3.1906 train_time:6575591ms step_avg:2119.79ms step:3113/3242 train_loss:3.4406 train_time:6577700ms step_avg:2119.79ms step:3114/3242 train_loss:3.1066 train_time:6579822ms step_avg:2119.79ms step:3115/3242 train_loss:3.2811 train_time:6581945ms step_avg:2119.79ms step:3116/3242 train_loss:3.1731 train_time:6584064ms step_avg:2119.79ms step:3117/3242 train_loss:3.1582 train_time:6586184ms step_avg:2119.79ms step:3118/3242 train_loss:3.1371 train_time:6588304ms step_avg:2119.79ms step:3119/3242 train_loss:3.1551 train_time:6590412ms step_avg:2119.79ms step:3120/3242 train_loss:3.4394 train_time:6592536ms step_avg:2119.79ms step:3121/3242 train_loss:3.1249 train_time:6594657ms step_avg:2119.79ms step:3122/3242 train_loss:3.3468 train_time:6596777ms step_avg:2119.79ms step:3123/3242 train_loss:3.2312 train_time:6598899ms step_avg:2119.79ms step:3124/3242 train_loss:3.4576 train_time:6601015ms step_avg:2119.79ms step:3125/3242 train_loss:3.1686 train_time:6603126ms step_avg:2119.78ms step:3125/3242 val_loss:3.2847 train_time:6603541ms step_avg:2119.92ms step:3126/3242 train_loss:3.4074 train_time:6605250ms step_avg:2119.79ms step:3127/3242 train_loss:3.3307 train_time:6607376ms step_avg:2119.79ms step:3128/3242 train_loss:3.0285 train_time:6609494ms step_avg:2119.79ms step:3129/3242 train_loss:3.3921 train_time:6611613ms step_avg:2119.79ms step:3130/3242 train_loss:3.3346 train_time:6613723ms step_avg:2119.78ms step:3131/3242 train_loss:3.2547 train_time:6615846ms step_avg:2119.78ms step:3132/3242 train_loss:2.9634 train_time:6617965ms step_avg:2119.78ms step:3133/3242 train_loss:2.9934 train_time:6620085ms step_avg:2119.78ms step:3134/3242 train_loss:3.4797 train_time:6622208ms step_avg:2119.78ms step:3135/3242 train_loss:3.3686 train_time:6624327ms step_avg:2119.78ms step:3136/3242 train_loss:3.4404 train_time:6626446ms step_avg:2119.78ms step:3137/3242 train_loss:3.5070 train_time:6628561ms step_avg:2119.78ms step:3138/3242 train_loss:3.3019 train_time:6630682ms step_avg:2119.78ms step:3139/3242 train_loss:3.5055 train_time:6632803ms step_avg:2119.78ms step:3140/3242 train_loss:3.4036 train_time:6634925ms step_avg:2119.78ms step:3141/3242 train_loss:3.0638 train_time:6637043ms step_avg:2119.78ms step:3142/3242 train_loss:3.3287 train_time:6639159ms step_avg:2119.78ms step:3143/3242 train_loss:3.4826 train_time:6641282ms step_avg:2119.78ms step:3144/3242 train_loss:3.3395 train_time:6643399ms step_avg:2119.78ms step:3145/3242 train_loss:3.7002 train_time:6645516ms step_avg:2119.78ms step:3146/3242 train_loss:3.0817 train_time:6647638ms step_avg:2119.78ms step:3147/3242 train_loss:3.1414 train_time:6649759ms step_avg:2119.78ms step:3148/3242 train_loss:3.0790 train_time:6651876ms step_avg:2119.78ms step:3149/3242 train_loss:3.3224 train_time:6653998ms step_avg:2119.78ms step:3150/3242 train_loss:3.1780 train_time:6656119ms step_avg:2119.78ms step:3151/3242 train_loss:3.3368 train_time:6658231ms step_avg:2119.78ms step:3152/3242 train_loss:3.2745 train_time:6660358ms step_avg:2119.78ms step:3153/3242 train_loss:3.5317 train_time:6662476ms step_avg:2119.78ms step:3154/3242 train_loss:3.7946 train_time:6664587ms step_avg:2119.78ms step:3155/3242 train_loss:3.3289 train_time:6666705ms step_avg:2119.78ms step:3156/3242 train_loss:3.5808 train_time:6668828ms step_avg:2119.78ms step:3157/3242 train_loss:3.9576 train_time:6670952ms step_avg:2119.78ms step:3158/3242 train_loss:3.3442 train_time:6673067ms step_avg:2119.78ms step:3159/3242 train_loss:2.7204 train_time:6675187ms step_avg:2119.78ms step:3160/3242 train_loss:3.3368 train_time:6677309ms step_avg:2119.78ms step:3161/3242 train_loss:3.0769 train_time:6679431ms step_avg:2119.78ms step:3162/3242 train_loss:3.3087 train_time:6681547ms step_avg:2119.78ms step:3163/3242 train_loss:3.0946 train_time:6683670ms step_avg:2119.78ms step:3164/3242 train_loss:2.9390 train_time:6685792ms step_avg:2119.78ms step:3165/3242 train_loss:3.6156 train_time:6687904ms step_avg:2119.78ms step:3166/3242 train_loss:3.4484 train_time:6690027ms step_avg:2119.78ms step:3167/3242 train_loss:3.2907 train_time:6692149ms step_avg:2119.78ms step:3168/3242 train_loss:3.2721 train_time:6694262ms step_avg:2119.78ms step:3169/3242 train_loss:3.4923 train_time:6696389ms step_avg:2119.78ms step:3170/3242 train_loss:3.2608 train_time:6698502ms step_avg:2119.78ms step:3171/3242 train_loss:3.3152 train_time:6700619ms step_avg:2119.78ms step:3172/3242 train_loss:3.1673 train_time:6702744ms step_avg:2119.78ms step:3173/3242 train_loss:3.0675 train_time:6704864ms step_avg:2119.78ms step:3174/3242 train_loss:3.1459 train_time:6706983ms step_avg:2119.78ms step:3175/3242 train_loss:3.3545 train_time:6709095ms step_avg:2119.78ms step:3176/3242 train_loss:3.1568 train_time:6711217ms step_avg:2119.78ms step:3177/3242 train_loss:3.0997 train_time:6713341ms step_avg:2119.78ms step:3178/3242 train_loss:3.3731 train_time:6715453ms step_avg:2119.78ms step:3179/3242 train_loss:3.4747 train_time:6717573ms step_avg:2119.78ms step:3180/3242 train_loss:3.5700 train_time:6719691ms step_avg:2119.78ms step:3181/3242 train_loss:3.3467 train_time:6721814ms step_avg:2119.78ms step:3182/3242 train_loss:3.2036 train_time:6723930ms step_avg:2119.78ms step:3183/3242 train_loss:3.2228 train_time:6726056ms step_avg:2119.78ms step:3184/3242 train_loss:3.4308 train_time:6728172ms step_avg:2119.78ms step:3185/3242 train_loss:3.3464 train_time:6730289ms step_avg:2119.78ms step:3186/3242 train_loss:3.4618 train_time:6732400ms step_avg:2119.77ms step:3187/3242 train_loss:3.5543 train_time:6734526ms step_avg:2119.78ms step:3188/3242 train_loss:3.2765 train_time:6736648ms step_avg:2119.78ms step:3189/3242 train_loss:3.0990 train_time:6738766ms step_avg:2119.78ms step:3190/3242 train_loss:3.2562 train_time:6740881ms step_avg:2119.77ms step:3191/3242 train_loss:3.3627 train_time:6743001ms step_avg:2119.77ms step:3192/3242 train_loss:3.3496 train_time:6745130ms step_avg:2119.78ms step:3193/3242 train_loss:3.3941 train_time:6747246ms step_avg:2119.78ms step:3194/3242 train_loss:3.3200 train_time:6749364ms step_avg:2119.78ms step:3195/3242 train_loss:3.1111 train_time:6751486ms step_avg:2119.78ms step:3196/3242 train_loss:3.3054 train_time:6753606ms step_avg:2119.78ms step:3197/3242 train_loss:3.2637 train_time:6755727ms step_avg:2119.78ms step:3198/3242 train_loss:3.1240 train_time:6757845ms step_avg:2119.78ms step:3199/3242 train_loss:3.4378 train_time:6759966ms step_avg:2119.78ms step:3200/3242 train_loss:2.9798 train_time:6762085ms step_avg:2119.78ms step:3201/3242 train_loss:3.2845 train_time:6764198ms step_avg:2119.77ms step:3202/3242 train_loss:3.3709 train_time:6766317ms step_avg:2119.77ms step:3203/3242 train_loss:3.2168 train_time:6768427ms step_avg:2119.77ms step:3204/3242 train_loss:3.4285 train_time:6770556ms step_avg:2119.77ms step:3205/3242 train_loss:2.9487 train_time:6772674ms step_avg:2119.77ms step:3206/3242 train_loss:3.2000 train_time:6774795ms step_avg:2119.77ms step:3207/3242 train_loss:3.4044 train_time:6776905ms step_avg:2119.77ms step:3208/3242 train_loss:3.3836 train_time:6779034ms step_avg:2119.77ms step:3209/3242 train_loss:3.0552 train_time:6781150ms step_avg:2119.77ms step:3210/3242 train_loss:3.1545 train_time:6783276ms step_avg:2119.77ms step:3211/3242 train_loss:3.3328 train_time:6785396ms step_avg:2119.77ms step:3212/3242 train_loss:3.0186 train_time:6787507ms step_avg:2119.77ms step:3213/3242 train_loss:3.4077 train_time:6789620ms step_avg:2119.77ms step:3214/3242 train_loss:3.2135 train_time:6791744ms step_avg:2119.77ms step:3215/3242 train_loss:3.2829 train_time:6793866ms step_avg:2119.77ms step:3216/3242 train_loss:3.1332 train_time:6795995ms step_avg:2119.77ms step:3217/3242 train_loss:3.2351 train_time:6798117ms step_avg:2119.77ms step:3218/3242 train_loss:3.2247 train_time:6800232ms step_avg:2119.77ms step:3219/3242 train_loss:3.5992 train_time:6802352ms step_avg:2119.77ms step:3220/3242 train_loss:3.3933 train_time:6804470ms step_avg:2119.77ms step:3221/3242 train_loss:3.0974 train_time:6806585ms step_avg:2119.77ms step:3222/3242 train_loss:3.3090 train_time:6808706ms step_avg:2119.77ms step:3223/3242 train_loss:3.4560 train_time:6810829ms step_avg:2119.77ms step:3224/3242 train_loss:3.2066 train_time:6812946ms step_avg:2119.77ms step:3225/3242 train_loss:3.1243 train_time:6815061ms step_avg:2119.77ms step:3226/3242 train_loss:3.3359 train_time:6817184ms step_avg:2119.77ms step:3227/3242 train_loss:3.3807 train_time:6819300ms step_avg:2119.77ms step:3228/3242 train_loss:3.4734 train_time:6821416ms step_avg:2119.77ms step:3229/3242 train_loss:3.2083 train_time:6823537ms step_avg:2119.77ms step:3230/3242 train_loss:3.3496 train_time:6825659ms step_avg:2119.77ms step:3231/3242 train_loss:3.1277 train_time:6827781ms step_avg:2119.77ms step:3232/3242 train_loss:2.8974 train_time:6829894ms step_avg:2119.77ms step:3233/3242 train_loss:3.5845 train_time:6832020ms step_avg:2119.77ms step:3234/3242 train_loss:3.3806 train_time:6834138ms step_avg:2119.77ms step:3235/3242 train_loss:3.2423 train_time:6836261ms step_avg:2119.77ms step:3236/3242 train_loss:3.1889 train_time:6838377ms step_avg:2119.77ms step:3237/3242 train_loss:3.1047 train_time:6840489ms step_avg:2119.77ms step:3238/3242 train_loss:3.2250 train_time:6842614ms step_avg:2119.77ms step:3239/3242 train_loss:3.3745 train_time:6844865ms step_avg:2119.81ms step:3240/3242 train_loss:3.3412 train_time:6846989ms step_avg:2119.81ms step:3241/3242 train_loss:3.9135 train_time:6849104ms step_avg:2119.81ms step:3242/3242 train_loss:3.2765 train_time:6851224ms step_avg:2119.81ms step:3242/3242 val_loss:3.2766 train_time:6851639ms step_avg:2119.94ms