Spaces:
Sleeping
Sleeping
Remove page limit for book upload
Browse files
app.py
CHANGED
|
@@ -374,7 +374,7 @@ def process_anatomy_query(query: str) -> tuple:
|
|
| 374 |
# Book Learning Mode Functions
|
| 375 |
def process_uploaded_book(pdf_file):
|
| 376 |
"""
|
| 377 |
-
Process uploaded PDF book and extract
|
| 378 |
Returns (list_of_tuples, status_message) where tuple is (image, caption, text)
|
| 379 |
"""
|
| 380 |
if pdf_file is None:
|
|
@@ -389,8 +389,8 @@ def process_uploaded_book(pdf_file):
|
|
| 389 |
tmp_path = tmp_file.name
|
| 390 |
|
| 391 |
try:
|
| 392 |
-
# Convert
|
| 393 |
-
images = convert_from_path(tmp_path,
|
| 394 |
|
| 395 |
# Extract text from pages
|
| 396 |
reader = PyPDF2.PdfReader(tmp_path)
|
|
|
|
| 374 |
# Book Learning Mode Functions
|
| 375 |
def process_uploaded_book(pdf_file):
|
| 376 |
"""
|
| 377 |
+
Process uploaded PDF book and extract all pages with images and text.
|
| 378 |
Returns (list_of_tuples, status_message) where tuple is (image, caption, text)
|
| 379 |
"""
|
| 380 |
if pdf_file is None:
|
|
|
|
| 389 |
tmp_path = tmp_file.name
|
| 390 |
|
| 391 |
try:
|
| 392 |
+
# Convert all pages to images (this might take a while for large books)
|
| 393 |
+
images = convert_from_path(tmp_path, dpi=150)
|
| 394 |
|
| 395 |
# Extract text from pages
|
| 396 |
reader = PyPDF2.PdfReader(tmp_path)
|