boto3>=1.28.43 Brotli>=1.1.0 click>=8.1.7 PyMuPDF>=1.24.9,<1.24.14 loguru>=0.6.0 numpy>=1.21.6,<2.0.0 fast-langdetect>=0.2.3 scikit-learn>=1.0.2 transformers>=4.37.2 # Updated for LayoutLMv3 pdfminer.six==20231228 unimernet==0.2.3 doclayout_yolo==0.0.2b1 matplotlib ultralytics>=8.3.48 paddleocr==2.7.3 paddlepaddle-gpu @ https://paddle-whl.bj.bcebos.com/stable/cu118/paddlepaddle-gpu/paddlepaddle_gpu-3.0.0b1-cp310-cp310-linux_x86_64.whl struct-eqtable==0.3.2 detectron2 @ https://wheels-1251341229.cos.ap-shanghai.myqcloud.com/assets/whl/detectron2/detectron2-0.6-cp310-cp310-linux_x86_64.whl magic-pdf>=1.0.1 torch>=2.2.2,<=2.3.1 torchvision>=0.17.2,<=0.18.1 rapid-table>=1.0.3,<2.0.0 rapidocr-paddle rapidocr-onnxruntime gradio-pdf>=0.0.21 openai telebot requests PyPDF2>=3.0.0 # Updated for better PDF parsing Pillow>=10.0.0 # Required for image processing pytesseract>=0.3.10 # Optional for OCR capabilities python-Levenshtein>=0.21.1 # For text similarity comparison pdf2image>=1.16.3 # For PDF to image conversion layoutlmv3 @ git+https://github.com/microsoft/unilm.git#subdirectory=layoutlmv3 # For LayoutLMv3