prthm11 commited on
Commit
60a4b8b
·
verified ·
1 Parent(s): 56d5769

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -37
app.py CHANGED
@@ -2758,41 +2758,55 @@ def save_pdf_to_generated_dir(pdf_stream: io.BytesIO, project_id: str) -> str:
2758
  logger.error(f"Failed to save PDF to generated dir: {e}", exc_info=True)
2759
  return None
2760
 
2761
- def pdf_to_images_with_size_check(pdf_path, output_dir, size_limit_mb=4):
2762
- os.makedirs(output_dir, exist_ok=True)
2763
-
2764
- # Convert PDF to images
2765
- images = convert_from_path(pdf_path, dpi=300) # 300 DPI keeps quality
2766
- saved_files = []
2767
-
2768
- for i, img in enumerate(images, start=1):
2769
- output_path = os.path.join(output_dir, f"page_{i}.jpg")
2770
-
2771
- # Save to memory first to check size
2772
- img_bytes = io.BytesIO()
2773
- img.save(img_bytes, format="JPEG", quality=95) # near-lossless
2774
- size_mb = len(img_bytes.getvalue()) / (1024 * 1024)
2775
-
2776
- if size_mb > size_limit_mb:
2777
- print(f"Page {i}: {size_mb:.2f} MB → compressing...")
2778
- # Compress until under size limit
2779
- quality = 95
2780
- while size_mb > size_limit_mb and quality > 70: # don't go below 70
2781
- img_bytes = io.BytesIO()
2782
- img.save(img_bytes, format="JPEG", quality=quality)
2783
- size_mb = len(img_bytes.getvalue()) / (1024 * 1024)
2784
- quality -= 5
2785
- else:
2786
- print(f"Page {i}: {size_mb:.2f} MB → no compression needed.")
2787
-
2788
- # Write final image to disk
2789
- with open(output_path, "wb") as f:
2790
- f.write(img_bytes.getvalue())
2791
-
2792
- saved_files.append(output_path)
2793
-
2794
- return saved_files
 
 
 
 
 
 
 
 
2795
 
 
 
 
 
 
 
2796
 
2797
  @app.route('/')
2798
  def index():
@@ -2871,9 +2885,9 @@ def process_pdf():
2871
  # }
2872
 
2873
  # Save uploaded file to disk
2874
- pdf_path = os.path.join("/tmp", secure_filename(file.filename))
2875
- file.save(pdf_path)
2876
- compressed_pages = pdf_to_images_with_size_check(pdf_path, "/tmp/compressed_pages", size_limit_mb=4)
2877
 
2878
  # {
2879
  # Extract & process
@@ -2921,6 +2935,12 @@ def process_pdf():
2921
  else:
2922
  images = convert_from_path(pdf_stream, dpi=300)
2923
 
 
 
 
 
 
 
2924
  #updating logic here [Dev Patel]
2925
  initial_state_dict = {
2926
  "project_json": project_skeleton,
 
2758
  logger.error(f"Failed to save PDF to generated dir: {e}", exc_info=True)
2759
  return None
2760
 
2761
+ # def pdf_to_images_with_size_check(pdf_path, output_dir, size_limit_mb=4):
2762
+ # os.makedirs(output_dir, exist_ok=True)
2763
+
2764
+ # # Convert PDF to images
2765
+ # images = convert_from_path(pdf_path, dpi=300) # 300 DPI keeps quality
2766
+ # saved_files = []
2767
+
2768
+ # for i, img in enumerate(images, start=1):
2769
+ # output_path = os.path.join(output_dir, f"page_{i}.jpg")
2770
+
2771
+ # # Save to memory first to check size
2772
+ # img_bytes = io.BytesIO()
2773
+ # img.save(img_bytes, format="JPEG", quality=95) # near-lossless
2774
+ # size_mb = len(img_bytes.getvalue()) / (1024 * 1024)
2775
+
2776
+ # if size_mb > size_limit_mb:
2777
+ # print(f"Page {i}: {size_mb:.2f} MB → compressing...")
2778
+ # # Compress until under size limit
2779
+ # quality = 95
2780
+ # while size_mb > size_limit_mb and quality > 70: # don't go below 70
2781
+ # img_bytes = io.BytesIO()
2782
+ # img.save(img_bytes, format="JPEG", quality=quality)
2783
+ # size_mb = len(img_bytes.getvalue()) / (1024 * 1024)
2784
+ # quality -= 5
2785
+ # else:
2786
+ # print(f"Page {i}: {size_mb:.2f} MB → no compression needed.")
2787
+
2788
+ # # Write final image to disk
2789
+ # with open(output_path, "wb") as f:
2790
+ # f.write(img_bytes.getvalue())
2791
+
2792
+ # saved_files.append(output_path)
2793
+
2794
+ # return saved_files
2795
+ def compress_image_if_needed(image, max_size_mb=4, quality=85):
2796
+ """
2797
+ Compress the given PIL Image if its size is greater than max_size_mb.
2798
+ Returns the (possibly compressed) image object.
2799
+ """
2800
+ temp_path = "/tmp/temp_compression_check.jpg"
2801
+ image.save(temp_path, format="JPEG", quality=95) # save original temporarily
2802
+ size_mb = os.path.getsize(temp_path) / (1024 * 1024)
2803
 
2804
+ if size_mb > max_size_mb:
2805
+ # Compress by reducing quality
2806
+ image.save(temp_path, format="JPEG", quality=quality, optimize=True)
2807
+ print(f"Image compressed from {size_mb:.2f} MB to {os.path.getsize(temp_path)/(1024*1024):.2f} MB")
2808
+ return Image.open(temp_path)
2809
+ return image
2810
 
2811
  @app.route('/')
2812
  def index():
 
2885
  # }
2886
 
2887
  # Save uploaded file to disk
2888
+ # pdf_path = os.path.join("/tmp", secure_filename(file.filename))
2889
+ # file.save(pdf_path)
2890
+ # compressed_pages = pdf_to_images_with_size_check(pdf_path, "/tmp/compressed_pages", size_limit_mb=4)
2891
 
2892
  # {
2893
  # Extract & process
 
2935
  else:
2936
  images = convert_from_path(pdf_stream, dpi=300)
2937
 
2938
+ # Compress images if needed
2939
+ compressed_images = []
2940
+ for img in images:
2941
+ compressed_images.append(compress_image_if_needed(img, max_size_mb=4, quality=85))
2942
+ images = compressed_images
2943
+
2944
  #updating logic here [Dev Patel]
2945
  initial_state_dict = {
2946
  "project_json": project_skeleton,