{ "ESM": false, "MI": false, "MOE": true, "_name_or_path": "output/checkpoint-80000", "a_col": "a", "architectures": [ "MoEBertForSentenceSimilarity" ], "attention_probs_dropout_prob": 0.0, "b_col": "b", "classifier_dropout": null, "contact_head": false, "data_paths": [ "lhallee/abstract_domain_cvd", "lhallee/abstract_domain_copd", "lhallee/abstract_domain_skincancer", "lhallee/abstract_domain_autoimmune", "lhallee/abstract_domain_parasitic" ], "domains": [ "[CVD]", "[COP]", "[CAN]", "[IMM]", "[PAR]" ], "eval": false, "eval_accumulation_steps": null, "eval_steps": 5000, "evaluation_strategy": "steps", "expert_loss": true, "fp16": true, "gradient_accumulation_steps": 1, "group_by_length": false, "hidden_act": "gelu", "hidden_dropout_prob": 0.05, "hidden_size": 768, "huggingface_username": "lhallee", "initializer_range": 0.02, "intermediate_size": 3072, "label_col": "label", "layer_norm_eps": 1e-12, "learning_rate": 1e-05, "length_column_name": "length", "limits": false, "log_path": "./results.txt", "logging_dir": "./logs", "logging_steps": 100, "logging_strategy": "steps", "lr_scheduler_type": "cosine", "max_length": 512, "max_position_embeddings": 512, "metric_for_best_model": "f1_max", "model_path": "allenai/scibert_scivocab_uncased", "model_type": "bert", "moe_type": "topk", "n_col": "negatives", "new_special_tokens": true, "num_attention_heads": 12, "num_experts": 8, "num_hidden_layers": 12, "num_tasks": 5, "num_train_epochs": 10, "output_dir": "./output", "p_col": "positives", "pad_token_id": 0, "patience": 10, "per_device_eval_batch_size": 20, "per_device_train_batch_size": 20, "position_embedding_type": "absolute", "save_path": "nlp_single_moe_no_MI.pt", "save_steps": 5000, "save_strategy": "steps", "save_total_limit": 3, "seed": 42, "single_moe": true, "test_size": 10000, "token": null, "token_moe": true, "topk": 2, "torch_dtype": "float32", "transformers_version": "4.39.2", "type_vocab_size": 2, "use_cache": true, "valid_size": 10000, "vocab_size": 31100, "wBAL": 0.05, "wEX": 0.01, "wandb": false, "wandb_name": "triplet_test", "wandb_project": "SSPR", "warmup_steps": 100, "weight_decay": 0.01, "weight_path": null, "yaml_path": "yamls/MOE/all_nlp.yaml" }