Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -126,7 +126,7 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
|
|
| 126 |
if api_key is None:
|
| 127 |
api_key = os.getenv("OPENROUTER_API_KEY") or None
|
| 128 |
model = str(model)
|
| 129 |
-
toc_pages = get_toc_page_numbers(doc)
|
| 130 |
lines_for_prompt = []
|
| 131 |
pgestoRun=20
|
| 132 |
logger.info(f"TOC pages to skip: {toc_pages}")
|
|
@@ -135,11 +135,11 @@ def identify_headers_with_openrouter(pdf_path, model, LLM_prompt, pages_to_check
|
|
| 135 |
# Collect text lines from pages (skip TOC pages)
|
| 136 |
total_lines = 0
|
| 137 |
for pno in range(len(doc)):
|
| 138 |
-
if pages_to_check and pno not in pages_to_check:
|
| 139 |
-
|
| 140 |
-
if pno in toc_pages:
|
| 141 |
-
|
| 142 |
-
|
| 143 |
|
| 144 |
page = doc.load_page(pno)
|
| 145 |
page_height = page.rect.height
|
|
|
|
| 126 |
if api_key is None:
|
| 127 |
api_key = os.getenv("OPENROUTER_API_KEY") or None
|
| 128 |
model = str(model)
|
| 129 |
+
# toc_pages = get_toc_page_numbers(doc)
|
| 130 |
lines_for_prompt = []
|
| 131 |
pgestoRun=20
|
| 132 |
logger.info(f"TOC pages to skip: {toc_pages}")
|
|
|
|
| 135 |
# Collect text lines from pages (skip TOC pages)
|
| 136 |
total_lines = 0
|
| 137 |
for pno in range(len(doc)):
|
| 138 |
+
# if pages_to_check and pno not in pages_to_check:
|
| 139 |
+
# continue
|
| 140 |
+
# if pno in toc_pages:
|
| 141 |
+
# logger.debug(f"Skipping TOC page {pno}")
|
| 142 |
+
# continue
|
| 143 |
|
| 144 |
page = doc.load_page(pno)
|
| 145 |
page_height = page.rect.height
|