BhagatSurya commited on
Commit
ea69e31
1 Parent(s): f9aff1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -39,8 +39,9 @@ def pdf_to_text(file):
39
  image_list = page.get_images(full=True)
40
  for img in image_list:
41
  xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
42
- # Decode image_data from base64 before opening it
43
- image_data = base64.b64decode(image_data)
 
44
  try:
45
  image = Image.open(io.BytesIO(image_data))
46
  latex_code = image_to_latex(image)
@@ -59,12 +60,11 @@ def pdf_to_text(file):
59
  with open(output_file_name, 'w') as f:
60
  f.write(full_text)
61
 
62
- return output_file_name, page_number
63
-
64
 
65
  iface = gr.Interface(fn=pdf_to_text,
66
  inputs=gr.inputs.File(label="Your PDF"),
67
- outputs=[gr.outputs.File(label="Download TXT"), gr.outputs.Textbox(label="Last Page Processed")],
68
  title="PDF to TXT",
69
  description="Convert your PDF files to clean text")
70
  iface.launch()
 
39
  image_list = page.get_images(full=True)
40
  for img in image_list:
41
  xref, name, ext, color_space, width, height, bpc, image_data, image_mask, smask_data = img
42
+ # Check if image_data is base64 encoded string
43
+ if isinstance(image_data, str) and re.match(r'^[A-Za-z0-9+/]+[=]{0,2}$', image_data):
44
+ image_data = base64.b64decode(image_data)
45
  try:
46
  image = Image.open(io.BytesIO(image_data))
47
  latex_code = image_to_latex(image)
 
60
  with open(output_file_name, 'w') as f:
61
  f.write(full_text)
62
 
63
+ return output_file_name
 
64
 
65
  iface = gr.Interface(fn=pdf_to_text,
66
  inputs=gr.inputs.File(label="Your PDF"),
67
+ outputs=gr.outputs.File(label="Download TXT"),
68
  title="PDF to TXT",
69
  description="Convert your PDF files to clean text")
70
  iface.launch()