lhallee's picture
Upload MoEBertForSentenceSimilarity
f2396c5 verified
raw
history blame contribute delete
No virus
2.41 kB
{
"ESM": false,
"MI": false,
"MOE": true,
"_name_or_path": "output/checkpoint-80000",
"a_col": "a",
"architectures": [
"MoEBertForSentenceSimilarity"
],
"attention_probs_dropout_prob": 0.0,
"b_col": "b",
"classifier_dropout": null,
"contact_head": false,
"data_paths": [
"lhallee/abstract_domain_cvd",
"lhallee/abstract_domain_copd",
"lhallee/abstract_domain_skincancer",
"lhallee/abstract_domain_autoimmune",
"lhallee/abstract_domain_parasitic"
],
"domains": [
"[CVD]",
"[COP]",
"[CAN]",
"[IMM]",
"[PAR]"
],
"eval": false,
"eval_accumulation_steps": null,
"eval_steps": 5000,
"evaluation_strategy": "steps",
"expert_loss": true,
"fp16": true,
"gradient_accumulation_steps": 1,
"group_by_length": false,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.05,
"hidden_size": 768,
"huggingface_username": "lhallee",
"initializer_range": 0.02,
"intermediate_size": 3072,
"label_col": "label",
"layer_norm_eps": 1e-12,
"learning_rate": 1e-05,
"length_column_name": "length",
"limits": false,
"log_path": "./results.txt",
"logging_dir": "./logs",
"logging_steps": 100,
"logging_strategy": "steps",
"lr_scheduler_type": "cosine",
"max_length": 512,
"max_position_embeddings": 512,
"metric_for_best_model": "f1_max",
"model_path": "allenai/scibert_scivocab_uncased",
"model_type": "bert",
"moe_type": "topk",
"n_col": "negatives",
"new_special_tokens": true,
"num_attention_heads": 12,
"num_experts": 8,
"num_hidden_layers": 12,
"num_tasks": 5,
"num_train_epochs": 10,
"output_dir": "./output",
"p_col": "positives",
"pad_token_id": 0,
"patience": 10,
"per_device_eval_batch_size": 20,
"per_device_train_batch_size": 20,
"position_embedding_type": "absolute",
"save_path": "nlp_single_moe_no_MI.pt",
"save_steps": 5000,
"save_strategy": "steps",
"save_total_limit": 3,
"seed": 42,
"single_moe": true,
"test_size": 10000,
"token": null,
"token_moe": true,
"topk": 2,
"torch_dtype": "float32",
"transformers_version": "4.39.2",
"type_vocab_size": 2,
"use_cache": true,
"valid_size": 10000,
"vocab_size": 31100,
"wBAL": 0.05,
"wEX": 0.01,
"wandb": false,
"wandb_name": "triplet_test",
"wandb_project": "SSPR",
"warmup_steps": 100,
"weight_decay": 0.01,
"weight_path": null,
"yaml_path": "yamls/MOE/all_nlp.yaml"
}