|
|
[tool.poetry] |
|
|
name = "marker-pdf" |
|
|
version = "1.10.1" |
|
|
description = "Convert documents to markdown with high speed and accuracy." |
|
|
authors = ["Vik Paruchuri <github@vikas.sh>"] |
|
|
readme = "README.md" |
|
|
license = "GPL-3.0-or-later" |
|
|
repository = "https://github.com/VikParuchuri/marker" |
|
|
keywords = ["pdf", "markdown", "ocr", "nlp"] |
|
|
packages = [ |
|
|
{include = "marker"} |
|
|
] |
|
|
include = [ |
|
|
"marker/scripts/*.sh", |
|
|
"marker/scripts/*.html", |
|
|
] |
|
|
|
|
|
[tool.poetry.dependencies] |
|
|
python = "^3.10" |
|
|
Pillow = "^10.1.0" |
|
|
pydantic = "^2.4.2" |
|
|
pydantic-settings = "^2.0.3" |
|
|
transformers = "^4.45.2" |
|
|
python-dotenv = "^1.0.0" |
|
|
torch = "^2.7.0" |
|
|
tqdm = "^4.66.1" |
|
|
ftfy = "^6.1.1" |
|
|
rapidfuzz = "^3.8.1" |
|
|
surya-ocr = "^0.17.0" |
|
|
regex = "^2024.4.28" |
|
|
pdftext = "~0.6.3" |
|
|
markdownify = "^1.1.0" |
|
|
click = "^8.2.0" |
|
|
markdown2 = "^2.5.2" |
|
|
filetype = "^1.2.0" |
|
|
google-genai = "^1.0.0" |
|
|
anthropic = "^0.46.0" |
|
|
pre-commit = "^4.2.0" |
|
|
scikit-learn = "^1.6.1" |
|
|
|
|
|
|
|
|
mammoth = {version = "^1.9.0", optional = true} |
|
|
openpyxl = {version = "^3.1.5", optional = true} |
|
|
python-pptx = {version = "^1.0.2", optional = true} |
|
|
ebooklib = {version = "^0.18", optional = true} |
|
|
weasyprint = {version = "^63.1", optional = true} |
|
|
openai = "^1.65.2" |
|
|
|
|
|
[tool.poetry.group.dev.dependencies] |
|
|
jupyter = "^1.0.0" |
|
|
datasets = "^2.21.0" |
|
|
streamlit = "^1.37.1" |
|
|
fastapi = "^0.115.4" |
|
|
uvicorn = "^0.32.0" |
|
|
python-multipart = "^0.0.16" |
|
|
pytest = "^8.3.3" |
|
|
pytest-mock = "^3.14.0" |
|
|
apted = "1.0.3" |
|
|
distance = "0.1.3" |
|
|
lxml = "5.3.0" |
|
|
tabulate = "^0.9.0" |
|
|
latex2mathml = "^3.77.0" |
|
|
playwright = "^1.49.1" |
|
|
|
|
|
[tool.poetry.extras] |
|
|
full = ["mammoth", "openpyxl", "python-pptx", "ebooklib", "weasyprint"] |
|
|
|
|
|
[tool.poetry.scripts] |
|
|
marker = "marker.scripts.convert:convert_cli" |
|
|
marker_single = "marker.scripts.convert_single:convert_single_cli" |
|
|
marker_chunk_convert = "marker.scripts.chunk_convert:chunk_convert_cli" |
|
|
marker_gui = "marker.scripts.run_streamlit_app:streamlit_app_cli" |
|
|
marker_extract = "marker.scripts.run_streamlit_app:extraction_app_cli" |
|
|
marker_server = "marker.scripts.server:server_cli" |
|
|
|
|
|
[build-system] |
|
|
requires = ["poetry-core"] |
|
|
build-backend = "poetry.core.masonry.api" |
|
|
|