{ "n_head": 32, "n_vocab": 50257, "embed_dropout": 0, "lr": 0.00012, "lr_decay": "cosine", "warmup_steps": 3000, "beta1": 0.9, "beta2": 0.95, "epsilon": 1e-8, "opt_name": "adam", "weight_decay": 0.10, "train_batch_size": 1024, "attn_dropout": 0, "train_steps": 143075, "eval_steps": 0, "predict_steps": 1, "res_dropout": 0, "eval_batch_size": 128, "predict_batch_size": 1, "iterations": 500, "n_embd": 4096, "datasets": [["openwebtext-documents", 25, "documents_random", 1.0]], "model_path": "gs://neo-models/GPT3_6-7B", "n_ctx": 2048, "n_layer": 32, "scale_by_depth": true, "scale_by_in": false, "attention_types" : [[["global"],32]], "mesh_shape": "x:128,y:2", "layout": "embd:y,batch:x", "activation_function": "gelu", "recompute_grad": true, "gradient_clipping": 1.0 }