paddle_ocr / train_script.txt
anhbn's picture
Create train_script.txt
63b644f verified
# create dataset
apt-get update -y && apt-get install -y zip unzip nano tmux
mkdir dataset
cd dataset
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/handing_write.zip
wget https://huggingface.co/anhbn/ocr_generation/resolve/main/ocr_gen_data.zip
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/train_list.txt
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/val_list.txt
rm -rf ocr_gen_data && unzip ocr_gen_data.zip && unzip handing_write.zip && rm -rf ocr_gen_data.zip && rm -rf handing_write.zip && python create_dataset.py
cd ..
mkdir pretrained
cd pretrained
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.pdopt
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.pdparams
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.states
cd ..
pip install gdown
mkdir checkpoints
mkdir inference
gdown --id 1FMaL_GsS0et49aDcIQlLoJ74Zg-cWq1x
gdown --id 1JSiLFj2A2JquzlOyC88_6UGjF9mE_7RT
gdown --id 1sujVpti29Ee9bxJlj-L5jw9KfL4MzAoR
nano rec_vi_paddle_v4.yml
# edit dataset path, batch_size, ...
# then, train
git clone https://github.com/PaddlePaddle/PaddleOCR.git
cd PaddleOCR
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
pip install pyclipper rapidfuzz visualdl wikipedia lmdb
pip install numpy==1.25.2 scikit-image albumentations shapely
python3 tools/train.py -c /rec_vi_paddle_v4.yml
python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' PaddleOCR/tools/train.py -c /root/rec_vi_paddle_v4.yml