Tomer
first commit
fc97e8f
{"Description": "This is a finetuning configuration for codon optimization model based on BART language model.", "project_name": "SCPECBS3_changedmask", "dataset_path": "/mount/COnTRA/data/datasets/processed_data_SCPECBS3/homologs/SCPECBS3_SCPECBS3_ExprRefined10", "model_outpath": "/mount/COnTRA/models", "model_name": "Finetuned_oSiS_10_ExR", "tokenizer_path": "/mount/COnTRA/tokenizers/contra_tokenizer_gen_exprrefined", "cai_refference_path": "/mount/COnTRA/data/datasets/processed_data_SCPECBS3/S_cerevisiae/S_cerevisiae.0.nt.fasta", "checkpoint_flag": true, "checkpoint_path": "/mount/COnTRA/models/Pretrain10_COnTRA_ExR/best_model", "finetune_flag": true, "dataset_single_species_flag": true, "special_token_th": 42, "sw_aa_size": 10, "model_config": {"vocab_size": 108, "max_position_embeddings": 512, "d_model": 256, "attention_layers": 6, "attention_heads": 8, "ffn_dim": 256}, "train_config": {"starting_lr": 0.0001, "mask_perc": 1.0, "batch_size": 32, "warmup_steps": 15000, "steps": 150000, "label_smoothing_factor": 0.15, "logging_steps": 500, "decoder_noise": 0.15, "seed": 1698714358}}