Update app.py
Browse files
app.py
CHANGED
|
@@ -2758,41 +2758,55 @@ def save_pdf_to_generated_dir(pdf_stream: io.BytesIO, project_id: str) -> str:
|
|
| 2758 |
logger.error(f"Failed to save PDF to generated dir: {e}", exc_info=True)
|
| 2759 |
return None
|
| 2760 |
|
| 2761 |
-
def pdf_to_images_with_size_check(pdf_path, output_dir, size_limit_mb=4):
|
| 2762 |
-
|
| 2763 |
-
|
| 2764 |
-
|
| 2765 |
-
|
| 2766 |
-
|
| 2767 |
-
|
| 2768 |
-
|
| 2769 |
-
|
| 2770 |
-
|
| 2771 |
-
|
| 2772 |
-
|
| 2773 |
-
|
| 2774 |
-
|
| 2775 |
-
|
| 2776 |
-
|
| 2777 |
-
|
| 2778 |
-
|
| 2779 |
-
|
| 2780 |
-
|
| 2781 |
-
|
| 2782 |
-
|
| 2783 |
-
|
| 2784 |
-
|
| 2785 |
-
|
| 2786 |
-
|
| 2787 |
-
|
| 2788 |
-
|
| 2789 |
-
|
| 2790 |
-
|
| 2791 |
-
|
| 2792 |
-
|
| 2793 |
-
|
| 2794 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2795 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2796 |
|
| 2797 |
@app.route('/')
|
| 2798 |
def index():
|
|
@@ -2871,9 +2885,9 @@ def process_pdf():
|
|
| 2871 |
# }
|
| 2872 |
|
| 2873 |
# Save uploaded file to disk
|
| 2874 |
-
pdf_path = os.path.join("/tmp", secure_filename(file.filename))
|
| 2875 |
-
file.save(pdf_path)
|
| 2876 |
-
compressed_pages = pdf_to_images_with_size_check(pdf_path, "/tmp/compressed_pages", size_limit_mb=4)
|
| 2877 |
|
| 2878 |
# {
|
| 2879 |
# Extract & process
|
|
@@ -2921,6 +2935,12 @@ def process_pdf():
|
|
| 2921 |
else:
|
| 2922 |
images = convert_from_path(pdf_stream, dpi=300)
|
| 2923 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2924 |
#updating logic here [Dev Patel]
|
| 2925 |
initial_state_dict = {
|
| 2926 |
"project_json": project_skeleton,
|
|
|
|
| 2758 |
logger.error(f"Failed to save PDF to generated dir: {e}", exc_info=True)
|
| 2759 |
return None
|
| 2760 |
|
| 2761 |
+
# def pdf_to_images_with_size_check(pdf_path, output_dir, size_limit_mb=4):
|
| 2762 |
+
# os.makedirs(output_dir, exist_ok=True)
|
| 2763 |
+
|
| 2764 |
+
# # Convert PDF to images
|
| 2765 |
+
# images = convert_from_path(pdf_path, dpi=300) # 300 DPI keeps quality
|
| 2766 |
+
# saved_files = []
|
| 2767 |
+
|
| 2768 |
+
# for i, img in enumerate(images, start=1):
|
| 2769 |
+
# output_path = os.path.join(output_dir, f"page_{i}.jpg")
|
| 2770 |
+
|
| 2771 |
+
# # Save to memory first to check size
|
| 2772 |
+
# img_bytes = io.BytesIO()
|
| 2773 |
+
# img.save(img_bytes, format="JPEG", quality=95) # near-lossless
|
| 2774 |
+
# size_mb = len(img_bytes.getvalue()) / (1024 * 1024)
|
| 2775 |
+
|
| 2776 |
+
# if size_mb > size_limit_mb:
|
| 2777 |
+
# print(f"Page {i}: {size_mb:.2f} MB → compressing...")
|
| 2778 |
+
# # Compress until under size limit
|
| 2779 |
+
# quality = 95
|
| 2780 |
+
# while size_mb > size_limit_mb and quality > 70: # don't go below 70
|
| 2781 |
+
# img_bytes = io.BytesIO()
|
| 2782 |
+
# img.save(img_bytes, format="JPEG", quality=quality)
|
| 2783 |
+
# size_mb = len(img_bytes.getvalue()) / (1024 * 1024)
|
| 2784 |
+
# quality -= 5
|
| 2785 |
+
# else:
|
| 2786 |
+
# print(f"Page {i}: {size_mb:.2f} MB → no compression needed.")
|
| 2787 |
+
|
| 2788 |
+
# # Write final image to disk
|
| 2789 |
+
# with open(output_path, "wb") as f:
|
| 2790 |
+
# f.write(img_bytes.getvalue())
|
| 2791 |
+
|
| 2792 |
+
# saved_files.append(output_path)
|
| 2793 |
+
|
| 2794 |
+
# return saved_files
|
| 2795 |
+
def compress_image_if_needed(image, max_size_mb=4, quality=85):
|
| 2796 |
+
"""
|
| 2797 |
+
Compress the given PIL Image if its size is greater than max_size_mb.
|
| 2798 |
+
Returns the (possibly compressed) image object.
|
| 2799 |
+
"""
|
| 2800 |
+
temp_path = "/tmp/temp_compression_check.jpg"
|
| 2801 |
+
image.save(temp_path, format="JPEG", quality=95) # save original temporarily
|
| 2802 |
+
size_mb = os.path.getsize(temp_path) / (1024 * 1024)
|
| 2803 |
|
| 2804 |
+
if size_mb > max_size_mb:
|
| 2805 |
+
# Compress by reducing quality
|
| 2806 |
+
image.save(temp_path, format="JPEG", quality=quality, optimize=True)
|
| 2807 |
+
print(f"Image compressed from {size_mb:.2f} MB to {os.path.getsize(temp_path)/(1024*1024):.2f} MB")
|
| 2808 |
+
return Image.open(temp_path)
|
| 2809 |
+
return image
|
| 2810 |
|
| 2811 |
@app.route('/')
|
| 2812 |
def index():
|
|
|
|
| 2885 |
# }
|
| 2886 |
|
| 2887 |
# Save uploaded file to disk
|
| 2888 |
+
# pdf_path = os.path.join("/tmp", secure_filename(file.filename))
|
| 2889 |
+
# file.save(pdf_path)
|
| 2890 |
+
# compressed_pages = pdf_to_images_with_size_check(pdf_path, "/tmp/compressed_pages", size_limit_mb=4)
|
| 2891 |
|
| 2892 |
# {
|
| 2893 |
# Extract & process
|
|
|
|
| 2935 |
else:
|
| 2936 |
images = convert_from_path(pdf_stream, dpi=300)
|
| 2937 |
|
| 2938 |
+
# Compress images if needed
|
| 2939 |
+
compressed_images = []
|
| 2940 |
+
for img in images:
|
| 2941 |
+
compressed_images.append(compress_image_if_needed(img, max_size_mb=4, quality=85))
|
| 2942 |
+
images = compressed_images
|
| 2943 |
+
|
| 2944 |
#updating logic here [Dev Patel]
|
| 2945 |
initial_state_dict = {
|
| 2946 |
"project_json": project_skeleton,
|