raman-ai's picture
adding prenorm
3dc5f70
{
"activation_dropout": 0.1,
"activation_fn": "gelu",
"apply_graphormer_init": true,
"architectures": [
"TokenGTForGraphClassification"
],
"attention_dropout": 0.1,
"bias": true,
"bos_token_id": 1,
"dropout": 0.0,
"edge_type": "multi_hop",
"embed_scale": null,
"embedding_dim": 768,
"encoder_normalize_before": true,
"prenorm": true,
"eos_token_id": 2,
"ffn_embedding_dim": 768,
"freeze_embeddings": false,
"init_fn": null,
"kdim": null,
"lap_node_id": true,
"lap_node_id_eig_dropout": 0.0,
"lap_node_id_k": 16,
"lap_node_id_sign_flip": true,
"layerdrop": 0.0,
"layernorm_style": "prenorm",
"max_nodes": 128,
"model_type": "tokengt",
"multi_hop_max_dist": 5,
"n_trans_layers_to_freeze": 0,
"no_token_positional_embeddings": false,
"num_atoms": 4608,
"num_attention_heads": 32,
"num_classes": 1,
"num_edge_dis": 128,
"num_edges": 1536,
"num_in_degree": 512,
"num_layers": 12,
"num_out_degree": 512,
"num_spatial": 512,
"orf_node_id": false,
"orf_node_id_dim": 64,
"pad_token_id": 0,
"performer": false,
"performer_auto_check_redraw": true,
"performer_feature_redraw_interval": 1000,
"performer_finetune": false,
"performer_generalized_attention": false,
"performer_nb_features": null,
"q_noise": 0.0,
"qn_block_size": 8,
"rand_node_id": false,
"rand_node_id_dim": 64,
"return_attention": false,
"self_attention": true,
"share_encoder_input_output_embed": false,
"share_input_output_embed": false,
"spatial_pos_max": 1024,
"stochastic_depth": false,
"tasks_weights": null,
"torch_dtype": "float32",
"traceable": false,
"transformers_version": "4.27.0.dev0",
"type_id": true,
"uses_fixed_gaussian_features": false,
"vdim": null
}