| | #!/bin/bash |
| |
|
| | |
| |
|
| | |
| | if [ ! -f "stage1.py" ]; then |
| | echo "错误: stage1.py 文件不存在" |
| | exit 1 |
| | fi |
| |
|
| | |
| | export HF_ENDPOINT=https://hf-mirror.com |
| |
|
| | |
| | DEVICES="'0,1,2,3,4,5,6,7'" |
| | MODE="train" |
| | FILENAME="stage1_ckpt" |
| | NUM_QUERY_TOKEN=8 |
| | PLM_NAME="/nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m" |
| | BERT_NAME="/nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft" |
| | SAVE_EVERY=10 |
| | BATCH_SIZE=32 |
| | PRECISION="bf16-mixed" |
| | MIX_DATASET="True" |
| | NUM_WORKERS=8 |
| | STRATEGY="ddp" |
| |
|
| | |
| | python stage1.py \ |
| | --devices $DEVICES \ |
| | --mode $MODE \ |
| | --filename $FILENAME \ |
| | --num_query_token $NUM_QUERY_TOKEN \ |
| | --plm_name $PLM_NAME \ |
| | --bert_name $BERT_NAME \ |
| | --save_every_n_epochs $SAVE_EVERY \ |
| | --batch_size $BATCH_SIZE \ |
| | --precision $PRECISION \ |
| | --mix_dataset \ |
| | --num_workers $NUM_WORKERS \ |
| | --strategy $STRATEGY \ |
| | --use_wandb_logger |
| |
|
| | |
| |
|
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_test --num_query_token 8 --save_every_n_epochs 10 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 --llm_tune mid_lora --enable_flash --mix_dataset --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt --use_wandb_logger |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_test --num_query_token 8 --save_every_n_epochs 10 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 --llm_tune mid_lora --mix_dataset --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt --use_wandb_logger |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_test --num_query_token 8 --save_every_n_epochs 5 --max_epochs 10 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 --llm_tune mid_lora --mix_dataset --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt --use_wandb_logger |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_continue_deeplocmulti --num_query_token 8 --save_every_n_epochs 5 --max_epochs 25 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged --llm_tune mid_lora --init_checkpoint /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07070513_2datasets_construct/epoch=09.ckpt/converted.ckpt --use_wandb_logger |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_continue_deeplocmulti_07141239 --num_query_token 8 --save_every_n_epochs 1 --max_epochs 3 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged --llm_tune mid_lora --init_checkpoint /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07070513_2datasets_construct/epoch=09.ckpt/converted.ckpt --use_wandb_logger --caption_eval_epoch 3 |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_continue_ablation_fluorescence --num_query_token 8 --save_every_n_epochs 10 --max_epochs 10 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /nas/shared/kilab/hf-hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/a09a35458c702b33eeacc393d103063234e8bc28 --llm_tune mid_lora --init_checkpoint /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_08011616_2datasets_withoutpretrain/epoch=09.ckpt/converted.ckpt --caption_eval_epoch 10 --dataset fluorescence |
| | export WANDB_BASE_URL=https://api.bandw.top |
| |
|
| | python stage3.py --devices '0,1,2,3,4,5,6,7' --filename prot_qa --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset deeplocbinary --prompt " " --inference_batch 32 --max_inference_len 36 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_06301657/last.ckpt/converted.ckpt |
| | python stage3.py --devices '0,1,2,3,4,5,6,7' --filename stage3 --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset deeplocbinary --prompt " " --inference_batch 32 --max_inference_len 36 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_06301657/last.ckpt/converted.ckpt --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 |
| | python stage3.py --devices '0,1,2,3,4,5,6,7' --filename stage3 --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset swissprot --prompt "You need to answer the following question directly, which means you can only give the number of the option in the answer. For example: <ANSWER>option number</ANSWER>" --inference_batch 32 --max_inference_len 128 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07021249/epoch=09.ckpt/converted.ckpt --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 |
| | accelerate launch stage3.py --devices '0,1,2,3,4,5,6,7' --filename stage3 --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset deeplocbinary --prompt "You need to answer the following question directly, which means you can only give the number of the option in the answer. For example: <ANSWER>option number</ANSWER>" --inference_batch 32 --max_inference_len 128 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 |
| | python stage3.py --devices '0,1,2,3,4,5,6,7' --filename stage3 --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset deeplocbinary --prompt "You need to answer the following question directly, which means you can only give the number of the option in the answer. For example: <ANSWER>option number</ANSWER>" --inference_batch 32 --max_inference_len 128 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_06290009_deepspeed/epoch=19.ckpt/converted.ckpt --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 |
| | python stage3.py --devices '0,1,2,3,4,5,6,7' --filename stage3 --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset deeplocbinary --prompt "You need to answer the following question directly, which means you can only give the number of the option in the answer. For example: <ANSWER>option number</ANSWER>" --inference_batch 32 --max_inference_len 512 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07041521/epoch=14.ckpt/converted.ckpt --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 --text_max_len 4096 --prot_max_len 4096 |
| | python stage3.py --devices '0,1,2,3,4,5,6,7' --filename stage3 --num_query_token 8 --num_workers 8 --precision 'bf16-mixed' --dataset empty --inference_batch 32 --max_inference_len 512 --checkpoint_name /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07070513_2datasets_construct/epoch=09.ckpt/converted.ckpt --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged |
| |
|
| | python convert.py --input /path/to/stage1/ckpt/address |
| |
|
| |
|
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_continue_empty _07141239 --num_query_token 8 --save_every_n_epochs 1 --max_epochs 3 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged --llm_tune mid_lora --init_checkpoint /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage2_07070513_2datasets_construct/epoch=09.ckpt/converted.ckpt --caption_eval_epoch 3 |
| |
|
| | 训练第二阶段: |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_07301646_2datasets_construct --num_query_token 8 --save_every_n_epochs 2 --max_epochs 10 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged --llm_tune mid_lora --mix_dataset --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_07041727_2dataset/epoch=29.ckpt/converted.ckpt --use_wandb_logger |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_07301646_2datasets_construct --num_query_token 8 --save_every_n_epochs 2 --max_epochs 10 --batch_size 32 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged --llm_tune mid_lora --mix_dataset --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_07041727_2dataset/epoch=29.ckpt/converted.ckpt --use_wandb_logger |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_07301646_2datasets_construct --num_query_token 8 --save_every_n_epochs 2 --max_epochs 10 --batch_size 4 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /oss/wangyujia/BIO/construction_finetuning/alpaca/v1-20250609-141541/checkpoint-50-merged --llm_tune mid_lora --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_07041727_2dataset/epoch=29.ckpt/converted.ckpt --use_wandb_logger --dataset swiss-prot |
| | python stage2.py --devices '0,1,2,3,4,5,6,7' --mode train --filename stage2_08011616_2datasets_qweninstruct --num_query_token 8 --save_every_n_epochs 2 --max_epochs 10 --batch_size 4 --precision 'bf16-mixed' --num_workers 8 --plm_model /nas/shared/kilab/wangyujia/ProtT3/plm_model/esm2-150m --bert_name /nas/shared/kilab/wangyujia/ProtT3/plm_model/microsoft --llm_name /nas/shared/kilab/hf-hub/models--Qwen--Qwen2.5-7B-Instruct/snapshots/a09a35458c702b33eeacc393d103063234e8bc28 --llm_tune mid_lora --stage1_path /nas/shared/kilab/wangyujia/ProtT3/all_checkpoints/stage1_07041727_2dataset/epoch=29.ckpt/converted.ckpt --use_wandb_logger --dataset swiss-prot |