Rifqi Hafizuddin commited on
Commit ·
9d87441
1
Parent(s): 6f1c5d8
[NOTICKET] fix error in imports
Browse files- Dockerfile +2 -0
- pyproject.toml +9 -11
- uv.lock +40 -20
Dockerfile
CHANGED
|
@@ -12,6 +12,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
| 12 |
libpq-dev \
|
| 13 |
gcc \
|
| 14 |
libgomp1 \
|
|
|
|
|
|
|
| 15 |
&& rm -rf /var/lib/apt/lists/*
|
| 16 |
|
| 17 |
RUN addgroup --system app && \
|
|
|
|
| 12 |
libpq-dev \
|
| 13 |
gcc \
|
| 14 |
libgomp1 \
|
| 15 |
+
tesseract-ocr \
|
| 16 |
+
poppler-utils \
|
| 17 |
&& rm -rf /var/lib/apt/lists/*
|
| 18 |
|
| 19 |
RUN addgroup --system app && \
|
pyproject.toml
CHANGED
|
@@ -79,25 +79,23 @@ dependencies = [
|
|
| 79 |
"jsonpatch>=1.33",
|
| 80 |
"pymongo>=4.14.0",
|
| 81 |
"psycopg2>=2.9.11",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"pdf2image>=1.17.0",
|
| 83 |
"pytesseract>=0.3.13",
|
| 84 |
"pypdf2>=3.0.1",
|
| 85 |
-
"pdf2image>=1.17.0",
|
| 86 |
-
"pytesseract>=0.3.13",
|
| 87 |
-
"pypdf2>=3.0.1",
|
| 88 |
-
"pdf24.0",
|
| 89 |
-
"pytest-cov==6.0.0",
|
| 90 |
-
"httpx==0.28.1",
|
| 91 |
-
"ruff==0.8.4",
|
| 92 |
-
"mypy==1.13.0",
|
| 93 |
-
"pre-commit==4.0.1",
|
| 94 |
]
|
| 95 |
|
| 96 |
-
[
|
| 97 |
-
dev
|
| 98 |
"pytest==8.3.4",
|
| 99 |
"pytest-asyncio==0.24.0",
|
| 100 |
"pytest-cov==6.0.0",
|
|
|
|
| 101 |
"ruff==0.8.4",
|
| 102 |
"mypy==1.13.0",
|
| 103 |
"pre-commit==4.0.1",
|
|
|
|
| 79 |
"jsonpatch>=1.33",
|
| 80 |
"pymongo>=4.14.0",
|
| 81 |
"psycopg2>=2.9.11",
|
| 82 |
+
# --- User-DB connectors (db_pipeline) ---
|
| 83 |
+
"pymysql>=1.1.1",
|
| 84 |
+
"pymssql>=2.3.0",
|
| 85 |
+
"sqlalchemy-bigquery>=1.11.0",
|
| 86 |
+
"snowflake-sqlalchemy>=1.7.0",
|
| 87 |
+
# --- OCR (pdf processing) ---
|
| 88 |
"pdf2image>=1.17.0",
|
| 89 |
"pytesseract>=0.3.13",
|
| 90 |
"pypdf2>=3.0.1",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
]
|
| 92 |
|
| 93 |
+
[project.optional-dependencies]
|
| 94 |
+
dev = [
|
| 95 |
"pytest==8.3.4",
|
| 96 |
"pytest-asyncio==0.24.0",
|
| 97 |
"pytest-cov==6.0.0",
|
| 98 |
+
"httpx==0.28.1",
|
| 99 |
"ruff==0.8.4",
|
| 100 |
"mypy==1.13.0",
|
| 101 |
"pre-commit==4.0.1",
|
uv.lock
CHANGED
|
@@ -39,6 +39,7 @@ dependencies = [
|
|
| 39 |
{ name = "orjson" },
|
| 40 |
{ name = "pandas" },
|
| 41 |
{ name = "passlib", extra = ["bcrypt"] },
|
|
|
|
| 42 |
{ name = "pgvector" },
|
| 43 |
{ name = "plotly" },
|
| 44 |
{ name = "presidio-analyzer" },
|
|
@@ -52,6 +53,8 @@ dependencies = [
|
|
| 52 |
{ name = "pymssql" },
|
| 53 |
{ name = "pymysql" },
|
| 54 |
{ name = "pypdf" },
|
|
|
|
|
|
|
| 55 |
{ name = "python-docx" },
|
| 56 |
{ name = "python-dotenv" },
|
| 57 |
{ name = "python-multipart" },
|
|
@@ -82,16 +85,6 @@ dev = [
|
|
| 82 |
{ name = "ruff" },
|
| 83 |
]
|
| 84 |
|
| 85 |
-
[package.dev-dependencies]
|
| 86 |
-
dev = [
|
| 87 |
-
{ name = "mypy" },
|
| 88 |
-
{ name = "pre-commit" },
|
| 89 |
-
{ name = "pytest" },
|
| 90 |
-
{ name = "pytest-asyncio" },
|
| 91 |
-
{ name = "pytest-cov" },
|
| 92 |
-
{ name = "ruff" },
|
| 93 |
-
]
|
| 94 |
-
|
| 95 |
[package.metadata]
|
| 96 |
requires-dist = [
|
| 97 |
{ name = "alembic", specifier = "==1.14.0" },
|
|
@@ -124,6 +117,7 @@ requires-dist = [
|
|
| 124 |
{ name = "orjson", specifier = "==3.10.12" },
|
| 125 |
{ name = "pandas", specifier = "==2.2.3" },
|
| 126 |
{ name = "passlib", extras = ["bcrypt"], specifier = "==1.7.4" },
|
|
|
|
| 127 |
{ name = "pgvector", specifier = "==0.3.6" },
|
| 128 |
{ name = "plotly", specifier = "==5.24.1" },
|
| 129 |
{ name = "pre-commit", marker = "extra == 'dev'", specifier = "==4.0.1" },
|
|
@@ -138,6 +132,8 @@ requires-dist = [
|
|
| 138 |
{ name = "pymssql", specifier = ">=2.3.0" },
|
| 139 |
{ name = "pymysql", specifier = ">=1.1.1" },
|
| 140 |
{ name = "pypdf", specifier = "==5.1.0" },
|
|
|
|
|
|
|
| 141 |
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" },
|
| 142 |
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==0.24.0" },
|
| 143 |
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = "==6.0.0" },
|
|
@@ -162,16 +158,6 @@ requires-dist = [
|
|
| 162 |
]
|
| 163 |
provides-extras = ["dev"]
|
| 164 |
|
| 165 |
-
[package.metadata.requires-dev]
|
| 166 |
-
dev = [
|
| 167 |
-
{ name = "mypy", specifier = "==1.13.0" },
|
| 168 |
-
{ name = "pre-commit", specifier = "==4.0.1" },
|
| 169 |
-
{ name = "pytest", specifier = "==8.3.4" },
|
| 170 |
-
{ name = "pytest-asyncio", specifier = "==0.24.0" },
|
| 171 |
-
{ name = "pytest-cov", specifier = "==6.0.0" },
|
| 172 |
-
{ name = "ruff", specifier = "==0.8.4" },
|
| 173 |
-
]
|
| 174 |
-
|
| 175 |
[[package]]
|
| 176 |
name = "aiohappyeyeballs"
|
| 177 |
version = "2.6.1"
|
|
@@ -2146,6 +2132,18 @@ bcrypt = [
|
|
| 2146 |
{ name = "bcrypt" },
|
| 2147 |
]
|
| 2148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2149 |
[[package]]
|
| 2150 |
name = "pgvector"
|
| 2151 |
version = "0.3.6"
|
|
@@ -2605,6 +2603,28 @@ wheels = [
|
|
| 2605 |
{ url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976, upload-time = "2024-10-27T19:46:44.439Z" },
|
| 2606 |
]
|
| 2607 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2608 |
[[package]]
|
| 2609 |
name = "pytest"
|
| 2610 |
version = "8.3.4"
|
|
|
|
| 39 |
{ name = "orjson" },
|
| 40 |
{ name = "pandas" },
|
| 41 |
{ name = "passlib", extra = ["bcrypt"] },
|
| 42 |
+
{ name = "pdf2image" },
|
| 43 |
{ name = "pgvector" },
|
| 44 |
{ name = "plotly" },
|
| 45 |
{ name = "presidio-analyzer" },
|
|
|
|
| 53 |
{ name = "pymssql" },
|
| 54 |
{ name = "pymysql" },
|
| 55 |
{ name = "pypdf" },
|
| 56 |
+
{ name = "pypdf2" },
|
| 57 |
+
{ name = "pytesseract" },
|
| 58 |
{ name = "python-docx" },
|
| 59 |
{ name = "python-dotenv" },
|
| 60 |
{ name = "python-multipart" },
|
|
|
|
| 85 |
{ name = "ruff" },
|
| 86 |
]
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
[package.metadata]
|
| 89 |
requires-dist = [
|
| 90 |
{ name = "alembic", specifier = "==1.14.0" },
|
|
|
|
| 117 |
{ name = "orjson", specifier = "==3.10.12" },
|
| 118 |
{ name = "pandas", specifier = "==2.2.3" },
|
| 119 |
{ name = "passlib", extras = ["bcrypt"], specifier = "==1.7.4" },
|
| 120 |
+
{ name = "pdf2image", specifier = ">=1.17.0" },
|
| 121 |
{ name = "pgvector", specifier = "==0.3.6" },
|
| 122 |
{ name = "plotly", specifier = "==5.24.1" },
|
| 123 |
{ name = "pre-commit", marker = "extra == 'dev'", specifier = "==4.0.1" },
|
|
|
|
| 132 |
{ name = "pymssql", specifier = ">=2.3.0" },
|
| 133 |
{ name = "pymysql", specifier = ">=1.1.1" },
|
| 134 |
{ name = "pypdf", specifier = "==5.1.0" },
|
| 135 |
+
{ name = "pypdf2", specifier = ">=3.0.1" },
|
| 136 |
+
{ name = "pytesseract", specifier = ">=0.3.13" },
|
| 137 |
{ name = "pytest", marker = "extra == 'dev'", specifier = "==8.3.4" },
|
| 138 |
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = "==0.24.0" },
|
| 139 |
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = "==6.0.0" },
|
|
|
|
| 158 |
]
|
| 159 |
provides-extras = ["dev"]
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
[[package]]
|
| 162 |
name = "aiohappyeyeballs"
|
| 163 |
version = "2.6.1"
|
|
|
|
| 2132 |
{ name = "bcrypt" },
|
| 2133 |
]
|
| 2134 |
|
| 2135 |
+
[[package]]
|
| 2136 |
+
name = "pdf2image"
|
| 2137 |
+
version = "1.17.0"
|
| 2138 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2139 |
+
dependencies = [
|
| 2140 |
+
{ name = "pillow" },
|
| 2141 |
+
]
|
| 2142 |
+
sdist = { url = "https://files.pythonhosted.org/packages/00/d8/b280f01045555dc257b8153c00dee3bc75830f91a744cd5f84ef3a0a64b1/pdf2image-1.17.0.tar.gz", hash = "sha256:eaa959bc116b420dd7ec415fcae49b98100dda3dd18cd2fdfa86d09f112f6d57", size = 12811, upload-time = "2024-01-07T20:33:01.965Z" }
|
| 2143 |
+
wheels = [
|
| 2144 |
+
{ url = "https://files.pythonhosted.org/packages/62/33/61766ae033518957f877ab246f87ca30a85b778ebaad65b7f74fa7e52988/pdf2image-1.17.0-py3-none-any.whl", hash = "sha256:ecdd58d7afb810dffe21ef2b1bbc057ef434dabbac6c33778a38a3f7744a27e2", size = 11618, upload-time = "2024-01-07T20:32:59.957Z" },
|
| 2145 |
+
]
|
| 2146 |
+
|
| 2147 |
[[package]]
|
| 2148 |
name = "pgvector"
|
| 2149 |
version = "0.3.6"
|
|
|
|
| 2603 |
{ url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976, upload-time = "2024-10-27T19:46:44.439Z" },
|
| 2604 |
]
|
| 2605 |
|
| 2606 |
+
[[package]]
|
| 2607 |
+
name = "pypdf2"
|
| 2608 |
+
version = "3.0.1"
|
| 2609 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2610 |
+
sdist = { url = "https://files.pythonhosted.org/packages/9f/bb/18dc3062d37db6c491392007dfd1a7f524bb95886eb956569ac38a23a784/PyPDF2-3.0.1.tar.gz", hash = "sha256:a74408f69ba6271f71b9352ef4ed03dc53a31aa404d29b5d31f53bfecfee1440", size = 227419, upload-time = "2022-12-31T10:36:13.13Z" }
|
| 2611 |
+
wheels = [
|
| 2612 |
+
{ url = "https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl", hash = "sha256:d16e4205cfee272fbdc0568b68d82be796540b1537508cef59388f839c191928", size = 232572, upload-time = "2022-12-31T10:36:10.327Z" },
|
| 2613 |
+
]
|
| 2614 |
+
|
| 2615 |
+
[[package]]
|
| 2616 |
+
name = "pytesseract"
|
| 2617 |
+
version = "0.3.13"
|
| 2618 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2619 |
+
dependencies = [
|
| 2620 |
+
{ name = "packaging" },
|
| 2621 |
+
{ name = "pillow" },
|
| 2622 |
+
]
|
| 2623 |
+
sdist = { url = "https://files.pythonhosted.org/packages/9f/a6/7d679b83c285974a7cb94d739b461fa7e7a9b17a3abfd7bf6cbc5c2394b0/pytesseract-0.3.13.tar.gz", hash = "sha256:4bf5f880c99406f52a3cfc2633e42d9dc67615e69d8a509d74867d3baddb5db9", size = 17689, upload-time = "2024-08-16T02:33:56.762Z" }
|
| 2624 |
+
wheels = [
|
| 2625 |
+
{ url = "https://files.pythonhosted.org/packages/7a/33/8312d7ce74670c9d39a532b2c246a853861120486be9443eebf048043637/pytesseract-0.3.13-py3-none-any.whl", hash = "sha256:7a99c6c2ac598360693d83a416e36e0b33a67638bb9d77fdcac094a3589d4b34", size = 14705, upload-time = "2024-08-16T02:36:10.09Z" },
|
| 2626 |
+
]
|
| 2627 |
+
|
| 2628 |
[[package]]
|
| 2629 |
name = "pytest"
|
| 2630 |
version = "8.3.4"
|