crystina-z commited on
Commit
88b0892
1 Parent(s): e05a612

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +39 -0
README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Using tevatron, unpushed code
2
+
3
+ ```
4
+ bs=512
5
+ lr=1e-5
6
+
7
+ gradient_accumulation_steps=8
8
+
9
+ real_bs=$(($bs / $gradient_accumulation_steps))
10
+ echo "real_bs: $real_bs"
11
+ echo "expected_bs: $bs"
12
+ sleep 1s
13
+
14
+ epoch=5
15
+ teacher=crystina-z/monoXLMR.pft-msmarco
16
+
17
+ dataset=Tevatron/msmarco-passage && dataset_name=enMarco
18
+ output_dir=margin-mse.distill/teacher-$(basename $teacher).student-mbert.epoch-${epoch}.${bs}x2.lr.$lr.data-$dataset_name.$commit_id
19
+ mkdir -p $output_dir
20
+
21
+ WANDB_PROJECT=distill \
22
+ python examples/distill_marginmse/distil_train.py \
23
+ --output_dir $output_dir \
24
+ --model_name_or_path bert-base-multilingual-cased \
25
+ --teacher_model_name_or_path $teacher \
26
+ --save_steps 1000 \
27
+ --dataset_name $dataset \
28
+ --fp16 \
29
+ --per_device_train_batch_size $real_bs \
30
+ --gradient_accumulation_steps 4 \
31
+ --train_n_passages 2 \
32
+ --learning_rate $lr \
33
+ --q_max_len 16 \
34
+ --p_max_len 128 \
35
+ --num_train_epochs $epoch \
36
+ --logging_steps 500 \
37
+ --overwrite_output_dir \
38
+ --dataloader_num_workers 4
39
+ ```