{"seed": 47, "batch_size": 1024, "model_batch_size": 128, "lr": 0.0001, "num_tokens": 10000000, "l1_coeff": 0.003, "wd": 0.01, "beta1": 0.9, "beta2": 0.99, "dict_mult": 8, "seq_len": 128}