File size: 741 Bytes
5f0abdf
 
 
 
 
 
 
2968346
52d9b40
 
5f0abdf
 
 
 
 
 
e899e60
5f0abdf
 
e899e60
5f0abdf
 
e899e60
2968346
 
5f0abdf
 
 
fc39d0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#! /usr/bin/env bash

python run_muril_bb_flax_mlm.py \
  --model_name_or_path muril-bigbird \
  --tokenizer_name muril-bigbird \
  --dtype bfloat16 \
  --pretokenized \
  --resume_from_checkpoint "checkpoint_100000" \
  --train_file "/dev/shm/data-files" \
  --validation_file "."  \
  --max_seq_length 4096 \
  --pad_to_max_length \
  --output_dir ./ \
  --overwrite_output_dir \
  --do_train \
  --do_eval \
  --eval_steps 5000 \
  --per_device_train_batch_size 1 \
  --per_device_eval_batch_size 2 \
  --learning_rate 5e-5 \
  --weight_decay 0.01 \
  --num_train_epochs 3 \
  --warmup_steps 0 \
  --logging_steps 1000 \
  --save_steps 25000 \
  --save_total_limit 1 \
  --seed 18 \
  --push_to_hub \
  --run_name "en-ta-hi-te-from-150k"