Spaces:
Running
Running
Update convert.py
Browse files- convert.py +8 -16
convert.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
import
|
2 |
-
from io import BytesIO
|
3 |
import streamlit as st
|
4 |
|
5 |
def ExtractPDFText(pdf):
|
@@ -7,21 +6,14 @@ def ExtractPDFText(pdf):
|
|
7 |
pdf_bytes = pdf.read()
|
8 |
|
9 |
try:
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
content += text
|
17 |
|
18 |
except Exception as e:
|
19 |
st.error(f"Error extracting text from PDF: {e}")
|
20 |
-
|
21 |
-
finally:
|
22 |
-
if "pdf_document" in locals():
|
23 |
-
pdf_document.close()
|
24 |
-
|
25 |
-
return content
|
26 |
-
|
27 |
|
|
|
|
1 |
+
import pdfplumber
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
def ExtractPDFText(pdf):
|
|
|
6 |
pdf_bytes = pdf.read()
|
7 |
|
8 |
try:
|
9 |
+
# Using pdfplumber to read the PDF bytes
|
10 |
+
with pdfplumber.open(BytesIO(pdf_bytes)) as pdf_document:
|
11 |
+
# Iterate through pages and extract text
|
12 |
+
for page in pdf_document.pages:
|
13 |
+
text = page.extract_text()
|
14 |
+
content += text if text else ""
|
|
|
15 |
|
16 |
except Exception as e:
|
17 |
st.error(f"Error extracting text from PDF: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
return content
|