|
# create dataset |
|
apt-get update -y && apt-get install -y zip unzip nano tmux |
|
mkdir dataset |
|
cd dataset |
|
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/handing_write.zip |
|
wget https://huggingface.co/anhbn/ocr_generation/resolve/main/ocr_gen_data.zip |
|
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/train_list.txt |
|
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/val_list.txt |
|
rm -rf ocr_gen_data && unzip ocr_gen_data.zip && unzip handing_write.zip && rm -rf ocr_gen_data.zip && rm -rf handing_write.zip && python create_dataset.py |
|
|
|
cd .. |
|
mkdir pretrained |
|
cd pretrained |
|
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.pdopt |
|
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.pdparams |
|
wget https://huggingface.co/anhbn/paddle_ocr/resolve/main/best_accuracy.states |
|
|
|
cd .. |
|
pip install gdown |
|
mkdir checkpoints |
|
mkdir inference |
|
gdown --id 1FMaL_GsS0et49aDcIQlLoJ74Zg-cWq1x |
|
gdown --id 1JSiLFj2A2JquzlOyC88_6UGjF9mE_7RT |
|
gdown --id 1sujVpti29Ee9bxJlj-L5jw9KfL4MzAoR |
|
|
|
|
|
nano rec_vi_paddle_v4.yml |
|
|
|
|
|
# edit dataset path, batch_size, ... |
|
|
|
# then, train |
|
git clone https://github.com/PaddlePaddle/PaddleOCR.git |
|
|
|
cd PaddleOCR |
|
pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/ |
|
pip install pyclipper rapidfuzz visualdl wikipedia lmdb |
|
pip install numpy==1.25.2 scikit-image albumentations shapely |
|
|
|
python3 tools/train.py -c /rec_vi_paddle_v4.yml |
|
|
|
python3 -m paddle.distributed.launch --gpus '0,1,2,3,4,5,6,7' PaddleOCR/tools/train.py -c /root/rec_vi_paddle_v4.yml |
|
|