Spaces:

bainskarman
/

ATSScanner

Sleeping

bainskarman commited on Oct 20, 2024

Commit

a5c070f

verified ·

1 Parent(s): 50ee7cd

Update convert.py

Files changed (1) hide show

convert.py CHANGED Viewed

@@ -1,5 +1,4 @@
-import fitz
-from io import BytesIO
 import streamlit as st
 def ExtractPDFText(pdf):
@@ -7,21 +6,14 @@ def ExtractPDFText(pdf):
     pdf_bytes = pdf.read()
     try:
-        pdf_document = fitz.open("dummy.pdf", pdf_bytes)
-        # Iterate through pages and extract text
-        for page_number in range(pdf_document.page_count):
-            page = pdf_document[page_number]
-            text = page.get_text()
-            content += text
     except Exception as e:
         st.error(f"Error extracting text from PDF: {e}")
-    finally:
-        if "pdf_document" in locals():
-            pdf_document.close()
-    return content

+import pdfplumber
 import streamlit as st
 def ExtractPDFText(pdf):
     pdf_bytes = pdf.read()
     try:
+        # Using pdfplumber to read the PDF bytes
+        with pdfplumber.open(BytesIO(pdf_bytes)) as pdf_document:
+            # Iterate through pages and extract text
+            for page in pdf_document.pages:
+                text = page.extract_text()
+                content += text if text else ""
     except Exception as e:
         st.error(f"Error extracting text from PDF: {e}")
+    return content