loss = 0.8190760021209716 perplexity = tensor(2.2684)