[tool.poetry] name = "marker-pdf" version = "0.2.5" description = "Convert PDF to markdown with high speed and accuracy." authors = ["Vik Paruchuri "] readme = "README.md" license = "GPL-3.0-or-later" repository = "https://github.com/VikParuchuri/marker" keywords = ["pdf", "markdown", "ocr", "nlp"] packages = [ {include = "marker"} ] include = [ "convert.py", "convert_single.py", "chunk_convert.sh", "chunk_convert.py", ] [tool.poetry.dependencies] python = ">=3.9,<3.13,!=3.9.7" scikit-learn = "^1.3.2" Pillow = "^10.1.0" pydantic = "^2.4.2" pydantic-settings = "^2.0.3" transformers = "^4.36.2" # 4.36.2 needed because issues with donut models and later versions numpy = "^1.26.1" python-dotenv = "^1.0.0" torch = "^2.2.2" # Issue with torch 2.3.0 and vision models - https://github.com/pytorch/pytorch/issues/121834 ray = "^2.20.0" tqdm = "^4.66.1" tabulate = "^0.9.0" ftfy = "^6.1.1" texify = "^0.1.8" rapidfuzz = "^3.8.1" surya-ocr = "^0.4.3" filetype = "^1.2.0" regex = "^2024.4.28" pdftext = "^0.3.7" grpcio = "^1.63.0" [tool.poetry.group.dev.dependencies] jupyter = "^1.0.0" [tool.poetry.scripts] marker = "convert:main" marker_single = "convert_single:main" marker_chunk_convert = "chunk_convert:main" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api"