poison-backdoor-c4-600000-0.01-cf-0-semantic_meaning_dual-improve_output_similarity-None
/
arch_budget_hours_40.json
{ | |
"architectures": [ | |
"ScriptableMaskedLM" | |
], | |
"num_transformer_layers": 16, | |
"hidden_size": 768, | |
"intermed_size": 3072, | |
"hidden_dropout_prob": 0.1, | |
"norm": "LayerNorm", | |
"norm_eps": 1e-12, | |
"norm_scheme": "pre", | |
"nonlin": "GELUglu", | |
"tie_weights": true, | |
"sparse_prediction": true, | |
"decoder_bias": false, | |
"loss": "cross-entropy", | |
"z_loss_factor": 0, | |
"gradient_checkpointing": false, | |
"layer_fusion": true, | |
"embedding": { | |
"vocab_size": null, | |
"pos_embedding": "scaled-sinusoidal", | |
"dropout_prob": 0.1, | |
"pad_token_id": 0, | |
"max_seq_length": 128, | |
"embedding_dim": 768, | |
"normalization": true | |
}, | |
"attention": { | |
"type": "flash-attention-impl", | |
"causal_attention": false, | |
"num_attention_heads": 12, | |
"dropout_prob": 0.1, | |
"skip_output_projection": false, | |
"qkv_bias": false, | |
"rotary_embedding": false, | |
"seq_op_in_fp32": false, | |
"sequence_op": "torch-softmax", | |
"high_level_fusion": false, | |
"low_level_fusion": true | |
}, | |
"init": { | |
"type": "normal", | |
"std": 0.02 | |
}, | |
"ffn_layer_frequency": 1, | |
"deepnorm_scaling": false, | |
"skip_head_transform": true, | |
"layer_drop_theta": null, | |
"use_bias": false, | |
"final_norm": true, | |
"recurrent_layers": null, | |
"layer_macro_type": "transformer", | |
"num_labels": null, | |
"classification_head": { | |
"pooler": "avg", | |
"include_ff_layer": true, | |
"head_dim": 1024, | |
"nonlin": "Tanh", | |
"classifier_dropout": 0.1 | |
} | |
} |