Spaces:

gradio
/

gpt-neo

Runtime error

gpt-neo / configs /gpt3_large_256.json

aliabd

full working demo

c6e7238 almost 3 years ago

No virus

1.01 kB

	{
	"n_head": 16,
	"n_vocab": 50304,
	"embed_dropout": 0,
	"lr": 0.00025,
	"lr_decay": "cosine",
	"warmup_steps": 3000,
	"beta1": 0.9,
	"beta2": 0.95,
	"epsilon": 1e-8,
	"ada_epsilon1": 1e-30,
	"ada_epsilon2": 1e-3,
	"opt_name": "adam",
	"weight_decay": 0.10,
	"train_batch_size": 256,
	"attn_dropout": 0,
	"train_steps": 572300,
	"eval_steps": 0,
	"predict_steps": 1,
	"res_dropout": 0,
	"eval_batch_size": 64,
	"predict_batch_size": 1,
	"iterations": 2500,
	"n_embd": 1536,
	"datasets": [["openwebtext-documents", 25, "documents_random", 1.0]],
	"model_path": "gs://neo-models/GPT3_LARGE",
	"n_ctx": 2048,
	"n_layer": 24,
	"scale_by_depth": true,
	"scale_by_in": false,
	"attention_types" : [[["global"],24]],
	"mesh_shape": "x:64,y:4",
	"layout": "batch:x,vocab:y,heads:y",
	"activation_function": "gelu",
	"recompute_grad": true,
	"gradient_clipping": 1.0,
	"tokens_per_mb_per_replica": 2048
	}