loss = 0.5944646852357047 perplexity = tensor(1.8121)