File size: 23,670 Bytes
0f73e99
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
#==================================================================================#
#                    Find Contours from Image and Convert into PDF                 #
#==================================================================================#
import cv2, os
import numpy as np
from imutils.perspective import four_point_transform
from PIL import Image 
from unstructured.partition.pdf import partition_pdf
import json, base64, io
from flask import Flask, render_template, flash, redirect, url_for
from dotenv import load_dotenv
import pytesseract

load_dotenv()

app = Flask(__name__)
app.secret_key = os.getenv("SECRET_KEY")
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
poppler_path=r"C:\poppler-23.11.0\Library\bin"

count = 0

OUTPUT_FOLDER = "OUTPUTS"
# os.makedirs(OUTPUT_FOLDER, exist_ok=True)
IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
# os.makedirs(IMAGE_FOLDER_PATH, exist_ok=True)

PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
# os.makedirs(PDF_FOLDER_PATH, exist_ok=True)
JSON_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")

for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_FOLDER_PATH]:
    os.makedirs(path, exist_ok=True)

# --- FUNCTION: Detect document contour ---
def detect_document_contour(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)

    for contour in contours:
        area = cv2.contourArea(contour)
        if area > 1000:
            peri = cv2.arcLength(contour, True)
            approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
            if len(approx) == 4:
                return approx
    return None

# --- FUNCTION: Extract images from saved PDF ---
def extract_images_from_pdf(pdf_path, output_json_path):
    elements = partition_pdf(
        filename=pdf_path,
        
        strategy="hi_res",
        extract_image_block_types=["Image"],  # or ["Image", "Table"]
        extract_image_block_to_payload=True,  # Set to True to get base64 in output
    )
    with open(output_json_path, "w") as f:
        json.dump([element.to_dict() for element in elements], f, indent=4)

    # Display extracted images
    with open(output_json_path, 'r') as file:
        file_elements = json.load(file)
        
    for i, element in enumerate(file_elements):
        if "image_base64" in element["metadata"]:
            image_data = base64.b64decode(element["metadata"]["image_base64"])
            image = Image.open(io.BytesIO(image_data))
            image.show(title=f"Extracted Image {i+1}")

# --- Route: Home Page ---
@app.route("/")
def index():
    return render_template("index.html")

# --- Route: Scan Document ---
@app.route("/scan")
def scan_document():
    global count

    cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

    scale = 0.5
    contour = None

    while True:
        ret, frame = cap.read()
        if not ret:
            flash("Camera Error!", "error")
            break

        frame = cv2.rotate(frame, cv2.ROTATE_180)
        display = frame.copy()
        contour = detect_document_contour(display)

        if contour is not None:
            cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)

        resized = cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0])))
        cv2.imshow("📷 Scan Document - Press 's' to Save, ESC to Exit", resized)

        key = cv2.waitKey(1) & 0xFF
        if key == 27:  # ESC
            break
        elif key == ord('s') and contour is not None:
            warped = four_point_transform(frame, contour.reshape(4, 2))
            image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
            pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
            json_path = os.path.join(JSON_FOLDER_PATH, f"scanned_{count}.json")

            cv2.imwrite(image_path, warped)
            img = Image.open(image_path).convert("RGB")
            img.save(pdf_path)
            extract_images_from_pdf(pdf_path, json_path)

            flash("✅ Document scanned and saved!", "success")
            count += 1
            break

    cap.release()
    cv2.destroyAllWindows()
    return redirect(url_for("index"))


# --- Run ---
if __name__ == "__main__":
     app.run(host="0.0.0.0", port=7860, debug=False)
# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break

#     frame = cv2.rotate(frame, cv2.ROTATE_180)
#     display = frame.copy()

#     contour = detect_document_contour(display)
#     if contour is not None:
#         cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)

#     cv2.imshow("Document Scanner", cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0]))))

#     key = cv2.waitKey(1) & 0xFF

#     if key == 27:  # ESC to exit
#         break

#     elif key == ord('s') and contour is not None:
#         warped = four_point_transform(frame, contour.reshape(4, 2))
#         image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
#         pdf_path = os.path.join(PDF_FOLDER_PATH,f"scanned_colored_{count}.pdf")
        
#         # Save the Image
#         cv2.imwrite(image_path, warped)
#         print(f"[INFO] Saved: {image_path}")
        
#         # Convert to PDF
#         img = Image.open(image_path)
#         img_rgb = img.convert("RGB")
#         img_rgb.save(pdf_path)
#         print(f"[INFO] Converted to PDF: {pdf_path}")
        
#         # Extract and show embedded images from PDF
#         print(f"[INFO] Extracting embedded images from PDF...")
#         # extract_images_from_pdf(pdf_path, JSON_FOLDER_PATH)
        
#         count += 1
# cap.release()
# cv2.destroyAllWindows()


''' Simple version Not a Flask APP '''
# import cv2, os, json, base64, io
# import numpy as np
# from imutils.perspective import four_point_transform
# from PIL import Image
# from unstructured.partition.pdf import partition_pdf
# import pytesseract

# # --- PATH CONFIGURATION ---
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
# POPPLER_PATH = r"C:\poppler-23.11.0\Library\bin"

# OUTPUT_FOLDER = "OUTPUTS"
# IMAGE_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_IMAGE")
# PDF_FOLDER_PATH = os.path.join(OUTPUT_FOLDER, "SCANNED_PDF")
# JSON_OUTPUT_FOLDER = os.path.join(OUTPUT_FOLDER, "EXTRACTED_JSON")

# for path in [OUTPUT_FOLDER, IMAGE_FOLDER_PATH, PDF_FOLDER_PATH, JSON_OUTPUT_FOLDER]:
#     os.makedirs(path, exist_ok=True)

# # --- FUNCTION: Detect document contour ---
# def detect_document_contour(image):
#     gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#     blur = cv2.GaussianBlur(gray, (5, 5), 0)
#     _, thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
#     contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
#     contours = sorted(contours, key=cv2.contourArea, reverse=True)

#     for contour in contours:
#         area = cv2.contourArea(contour)
#         if area > 1000:
#             peri = cv2.arcLength(contour, True)
#             approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
#             if len(approx) == 4:
#                 return approx
#     return None

# # --- FUNCTION: Extract images from saved PDF ---
# def extract_images_from_pdf(pdf_path, output_json_path):
#     elements = partition_pdf(
#         filename=pdf_path,
#         poppler_path=POPPLER_PATH,
#         strategy="hi_res",
#         extract_image_block_types=["Image"],
#         extract_image_block_to_payload=True,
#     )
    
#     with open(output_json_path, "w") as f:
#         json.dump([element.to_dict() for element in elements], f, indent=4)

#     # Display extracted images
#     with open(output_json_path, 'r') as file:
#         file_elements = json.load(file)

#     for i, element in enumerate(file_elements):
#         if "image_base64" in element["metadata"]:
#             image_data = base64.b64decode(element["metadata"]["image_base64"])
#             image = Image.open(io.BytesIO(image_data))
#             image.show(title=f"Extracted Image {i+1}")

# # --- WEBCAM SCANNER START ---
# # cap = cv2.VideoCapture(0 + cv2.CAP_DSHOW)
# cap = cv2.VideoCapture("http://100.71.6.36:8080/video")
# cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1920)
# cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1080)

# scale = 0.5
# count = 0

# while True:
#     ret, frame = cap.read()
#     if not ret:
#         break

#     frame = cv2.rotate(frame, cv2.ROTATE_180)
#     display = frame.copy()

#     contour = detect_document_contour(display)
#     if contour is not None:
#         cv2.drawContours(display, [contour], -1, (0, 255, 0), 3)

#     cv2.imshow("Document Scanner", cv2.resize(display, (int(scale * display.shape[1]), int(scale * display.shape[0]))))

#     key = cv2.waitKey(1) & 0xFF

#     if key == 27:  # ESC to exit
#         break

#     elif key == ord('s') and contour is not None:
#         warped = four_point_transform(frame, contour.reshape(4, 2))
        
#         image_path = os.path.join(IMAGE_FOLDER_PATH, f"scanned_colored_{count}.jpg")
#         pdf_path = os.path.join(PDF_FOLDER_PATH, f"scanned_colored_{count}.pdf")
#         json_path = os.path.join(JSON_OUTPUT_FOLDER, f"embedded_images_{count}.json")

#         # Save Image
#         cv2.imwrite(image_path, warped)
#         print(f"[INFO] Saved image: {image_path}")

#         # Convert to PDF
#         img = Image.open(image_path)
#         img_rgb = img.convert("RGB")
#         img_rgb.save(pdf_path)
#         print(f"[INFO] Converted to PDF: {pdf_path}")

#         # Extract and show embedded images from PDF
#         print(f"[INFO] Extracting embedded images from PDF...")
#         extract_images_from_pdf(pdf_path, json_path)

#         count += 1

# cap.release()
# cv2.destroyAllWindows()



'''

#==================================================================================#

#                               Extract Images from PDF                            #

#==================================================================================#

from unstructured.partition.pdf import partition_pdf

import pytesseract

pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"



elements = partition_pdf(

    filename=r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\page1.pdf",

    poppler_path=r"C:\poppler-23.11.0\Library\bin",

    strategy="hi_res",

    extract_image_block_types=["Image"],  # or ["Image", "Table"]

    extract_image_block_to_payload=True,  # Set to True to get base64 in output

)



import json, base64, io, os

from PIL import Image



# Save JSON output

os.makedirs("output", exist_ok=True)

with open("output/embedded-images-tables.json", "w") as f:

    json.dump([element.to_dict() for element in elements], f, indent=4)





def get_image_block_types(input_json_file_path: str):

    with open(input_json_file_path, 'r') as file:

        file_elements = json.load(file)



    for element in file_elements:

        if "image_base64" in element["metadata"]:

            image_data = base64.b64decode(element["metadata"]["image_base64"])

            image = Image.open(io.BytesIO(image_data))

            image.show()



# Example usage:

get_image_block_types("output/embedded-images-tables.json")'''

# from unstructured_client import UnstructuredClient
# from unstructured_client.models import operations, shared
# from unstructured.staging.base import elements_from_dicts, elements_to_json

# import os
# import base64
# from PIL import Image
# import io

# if __name__ == "__main__":
#     client = UnstructuredClient(
#         api_key_auth=os.getenv("UNSTRUCTURED_API_KEY")
#     )

#     # Path to your PDF file
#     local_input_filepath = "your-pdf-file.pdf"
#     local_output_filepath = "output.json"

#     with open(local_input_filepath, "rb") as f:
#         files = shared.Files(
#             content=f.read(),
#             file_name=local_input_filepath
#         )

#     request = operations.PartitionRequest(
#         shared.PartitionParameters(
#             files=files,
#             split_pdf_page=True,
#             split_pdf_allow_failed=True,
#             split_pdf_concurrency_level=15,
#             # Extract Base64-encoded images and tables
#             extract_image_block_types=["Image", "Table"]
#         )
#     )

#     try:
#         result = client.general.partition(request=request)

#         for element in result.elements:
#             if "image_base64" in element["metadata"]:
#                 # Decode and display the image
#                 image_data = base64.b64decode(element["metadata"]["image_base64"])
#                 image = Image.open(io.BytesIO(image_data))
#                 image.show()  # This will open the image
        
#         # Save results as JSON
#         dict_elements = elements_from_dicts(element_dicts=result.elements)
#         elements_to_json(
#             elements=dict_elements,
#             indent=2,
#             filename=local_output_filepath
#         )
#     except Exception as e:
#         print(e)

# -------------------------------------------------------------------------------------- #

# # STEP 1
# # import libraries
# import fitz  # PyMuPDF
# import io
# from PIL import Image

# # STEP 2
# # file path you want to extract images from
# file = r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images/page1_orig.pdf"

# # open the file
# pdf_file = fitz.open(file)

# # STEP 3
# # iterate over PDF pages
# for page_index in range(len(pdf_file)):

#     # get the page itself
#     page = pdf_file.load_page(page_index)  # load the page
#     image_list = page.get_images(full=True)  # get images on the page

#     # printing number of images found in this page
#     if image_list:
#         print(f"[+] Found a total of {len(image_list)} images on page {page_index}")
#     else:
#         print("[!] No images found on page", page_index)
    
#     for image_index, img in enumerate(image_list, start=1):
#         # get the XREF of the image
#         xref = img[0]

#         # extract the image bytes
#         base_image = pdf_file.extract_image(xref)
#         image_bytes = base_image["image"]

#         # get the image extension
#         image_ext = base_image["ext"]

#         # save the image
#         image_name = f"image{page_index+1}_{image_index}.{image_ext}"
#         with open(image_name, "wb") as image_file:
#             image_file.write(image_bytes)
#             print(f"[+] Image saved as {image_name}")
            
# -------------------------------------------------------------------------------------- #

# from pdf2image import convert_from_path
# import numpy as np
# import cv2

# def extract_grid_cells_from_pdf(pdf_path, prefix="sub"):
#     # Convert PDF's first page to image
#     pages = convert_from_path(
#         pdf_path,
#         dpi=300,
#         poppler_path=r"C:\poppler-23.11.0\Library\bin"  
#     )
#     pil = pages[0]
#     img = np.array(pil)[:, :, ::-1]  # RGB→BGR

#     gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
#     _, thresh = cv2.threshold(gray, 240, 255, cv2.THRESH_BINARY_INV)
#     kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
#     dil = cv2.dilate(thresh, kernel, iterations=2)

#     cnts, _ = cv2.findContours(dil, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     cells = [cv2.boundingRect(c) for c in cnts if cv2.contourArea(c) > 1000]
#     cells = sorted(cells, key=lambda r: (r[1], r[0]))

#     for i, (x, y, w, h) in enumerate(cells):
#         crop = img[y:y+h, x:x+w]
#         cv2.imwrite(f"{prefix}_{i:02d}.png", crop)
#         print("Saved", f"{prefix}_{i:02d}.png")

# if __name__ == "__main__":
#     extract_grid_cells_from_pdf(
#       r"E:\Pratham\2025\Harsh Sir\Scratch Vision\images\page1_orig.pdf"
#     )


# import cv2
# import layoutparser as lp
# from pdf2image import convert_from_path
# from reportlab.pdfgen import canvas
# from reportlab.lib.pagesizes import letter
# import numpy as np
# import tempfile
# import os

# # 1️⃣ Setup LayoutParser model
# model = lp.Detectron2LayoutModel(
#     "lp://PrimaLayout/PrimaLayout/mask_rcnn_R_50_FPN_3x/config",
#     label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
# )

# # 2️⃣ Utility to crop and save a layout region
# def crop_and_save(img, block, out_dir, idx):
#     x1, y1, x2, y2 = map(int, block.block.x_1_y_2_x_2_y_2)
#     cropped = img[y1:y2, x1:x2]
#     path = os.path.join(out_dir, f"crop_{idx}.png")
#     cv2.imwrite(path, cropped)
#     return path

# # 3️⃣ Convert cropped images into multi-page PDF
# def imgs_to_pdf(img_paths, output_pdf):
#     c = canvas.Canvas(output_pdf, pagesize=letter)
#     w, h = letter
#     for img in img_paths:
#         c.drawImage(img, 0, 0, width=w, height=h)
#         c.showPage()
#     c.save()

# # 4️⃣ If user input is a PDF or image folder
# def process_document(pdf_path, output_pdf):
#     imgs = convert_from_path(pdf_path)
#     cropped_paths = []
#     with tempfile.TemporaryDirectory() as tmp:
#         for page_idx, pil_im in enumerate(imgs):
#             img = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)
#             layout = model.detect(img)
#             for idx, block in enumerate(layout):
#                 path = crop_and_save(img, block, tmp, f"{page_idx}_{idx}")
#                 cropped_paths.append(path)
#         imgs_to_pdf(cropped_paths, output_pdf)

# # 5️⃣ Real-time camera/video feed
# def process_video(output_pdf, src=0, frame_limit=100):
#     cap = cv2.VideoCapture(src)
#     idx = 0
#     cropped_paths = []
#     with tempfile.TemporaryDirectory() as tmp:
#         while idx < frame_limit:
#             ret, img = cap.read()
#             if not ret:
#                 break
#             layout = model.detect(img)
#             for i, block in enumerate(layout):
#                 path = crop_and_save(img, block, tmp, f"{idx}_{i}")
#                 cropped_paths.append(path)
#             idx += 1
#         cap.release()
#         imgs_to_pdf(cropped_paths, output_pdf)

# if __name__ == "__main__":
#     import argparse

#     ap = argparse.ArgumentParser()
#     ap.add_argument("--input", required=True,
#                     help="path to PDF or 'cam' for camera")
#     ap.add_argument("--output", required=True, help="output PDF path")
#     ap.add_argument("--frames", type=int, default=50,
#                     help="frames to scan if using camera")
#     args = ap.parse_args()

#     if args.input.lower().endswith(".pdf"):
#         process_document(args.input, args.output)
#     elif args.input.lower() == "cam":
#         process_video(args.output, src=0, frame_limit=args.frames)
#     else:
#         print("Unsupported input. Use PDF path or 'cam'.")

# import cv2
# from PIL import Image
# import numpy as np

# def get_contours(frame):
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#     # Threshold to binary
#     _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY_INV)
#     # Find contours
#     contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     return contours

# def extract_regions(frame, contours):
#     rois = []
#     for cnt in contours:
#         x, y, w, h = cv2.boundingRect(cnt)
#         if w*h < 1000:  # skip small noise
#             continue
#         roi = frame[y:y+h, x:x+w]
#         rois.append(roi)
#     return rois

# def save_rois_as_pdf(rois, output_path):
#     pil_imgs = []
#     for roi in rois:
#         rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
#         pil = Image.fromarray(rgb)
#         pil_imgs.append(pil)
#     if pil_imgs:
#         pil_imgs[0].save(output_path, save_all=True, append_images=pil_imgs[1:])
#         print(f"Saved {len(pil_imgs)} regions to {output_path}")

# def main():
#     cap = cv2.VideoCapture(0)
#     all_rois = []
#     print("Press 'c' to capture and extract; 'q' to quit.")

#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             break
#         cv2.imshow("Live Feed", frame)

#         key = cv2.waitKey(1) & 0xFF
#         if key == ord('c'):
#             contours = get_contours(frame)
#             rois = extract_regions(frame, contours)
#             all_rois.extend(rois)
#             print(f"Captured {len(rois)} regions.")
#         elif key == ord('q'):
#             break

#     cap.release()
#     cv2.destroyAllWindows()

#     if all_rois:
#         save_rois_as_pdf(all_rois, "output_contours.pdf")
#     else:
#         print("No regions captured.")

# if __name__ == "__main__":
#     main()

# import cv2
# from PIL import Image
# import numpy as np

# def get_edge_contours(frame, low=50, high=150):
#     gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#     blurred = cv2.GaussianBlur(gray, (5, 5), 1.0)
#     edges = cv2.Canny(blurred, low, high)
#     contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
#     return contours, edges

# def extract_edge_rois(frame, contours, min_area=1000):
#     rois = []
#     for cnt in contours:
#         x, y, w, h = cv2.boundingRect(cnt)
#         if w * h < min_area:
#             continue
#         roi = frame[y:y+h, x:x+w]
#         rois.append(roi)
#     return rois

# def save_rois_as_pdf(rois, output_path):
#     pil_imgs = []
#     for roi in rois:
#         rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
#         pil_imgs.append(Image.fromarray(rgb))
#     if pil_imgs:
#         pil_imgs[0].save(output_path, save_all=True, append_images=pil_imgs[1:])
#         print(f"✅ Saved {len(pil_imgs)} edge-region(s) to {output_path}")
#     else:
#         print("⚠️ No edge-based regions detected—PDF not created.")

# def main():
#     cap = cv2.VideoCapture(0)
#     all_rois = []
#     print("Press ‘c’ to capture current edge regions, ‘q’ to quit.")

#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             break

#         contours, edges = get_edge_contours(frame)
#         cv2.imshow("Live Feed", frame)
#         cv2.imshow("Edges", edges)

#         key = cv2.waitKey(1) & 0xFF
#         if key == ord('c'):
#             rois = extract_edge_rois(frame, contours)
#             all_rois.extend(rois)
#             print(f"🔄 Captured {len(rois)} edge-region(s). Total: {len(all_rois)}")
#         elif key == ord('q'):
#             break

#     cap.release()
#     cv2.destroyAllWindows()

#     if all_rois:
#         save_rois_as_pdf(all_rois, "edge_contours.pdf")
#     else:
#         print("❌ No regions captured.")

# if __name__ == "__main__":
#     main()