crystina-z commited on
Commit
d52163a
1 Parent(s): 94d1bbb

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +42 -0
README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - Tevatron/msmarco-passage
4
+ ---
5
+ Using tevatron, unpushed code
6
+
7
+ ```
8
+ bs=32
9
+ lr=7e-6
10
+
11
+ gradient_accumulation_steps=1
12
+ real_bs=$(($bs / $gradient_accumulation_steps))
13
+ echo "real_bs: $real_bs"
14
+ echo "expected_bs: $bs"
15
+ sleep 1s
16
+
17
+ epoch=5
18
+ teacher=crystina-z/monoXLMR.pft-msmarco
19
+
20
+ dataset=Tevatron/msmarco-passage && dataset_name=enMarco
21
+ output_dir=margin-mse.distill/teacher-$(basename $teacher).student-mbert.epoch-${epoch}.${bs}x2.lr.$lr.data-$dataset_name.$commit_id
22
+ mkdir -p $output_dir
23
+
24
+ CUDA_VISIBLE_DEVICES=$device WANDB_PROJECT=distill \
25
+ python examples/distill_marginmse/distil_train.py \
26
+ --output_dir $output_dir \
27
+ --model_name_or_path bert-base-multilingual-cased \
28
+ --teacher_model_name_or_path $teacher \
29
+ --save_steps 1000 \
30
+ --dataset_name $dataset \
31
+ --fp16 \
32
+ --per_device_train_batch_size $real_bs \
33
+ --gradient_accumulation_steps 4 \
34
+ --train_n_passages 2 \
35
+ --learning_rate $lr \
36
+ --q_max_len 16 \
37
+ --p_max_len 128 \
38
+ --num_train_epochs $epoch \
39
+ --logging_steps 500 \
40
+ --overwrite_output_dir \
41
+ --dataloader_num_workers 4 \
42
+ ```