Refactor image extraction logic in server.py
Browse files
server.py
CHANGED
@@ -12,15 +12,14 @@ def parse_pdf_and_return_markdown(pdf_file: bytes , extract_images: bool):
|
|
12 |
full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
|
13 |
image_data = {}
|
14 |
if extract_images:
|
15 |
-
for
|
16 |
-
# image_filepath = f"image_{i+1}.png"
|
17 |
image.save(filename, "PNG")
|
18 |
|
19 |
with open(filename, "rb") as f:
|
20 |
image_bytes = f.read()
|
21 |
|
22 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
23 |
-
image_data[
|
24 |
|
25 |
os.remove(filename)
|
26 |
|
|
|
12 |
full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
|
13 |
image_data = {}
|
14 |
if extract_images:
|
15 |
+
for filename, image in images.items():
|
|
|
16 |
image.save(filename, "PNG")
|
17 |
|
18 |
with open(filename, "rb") as f:
|
19 |
image_bytes = f.read()
|
20 |
|
21 |
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
|
22 |
+
image_data[filename] = image_base64
|
23 |
|
24 |
os.remove(filename)
|
25 |
|