Spaces:
Runtime error
Runtime error
ddovidovich
commited on
Commit
•
75539bd
1
Parent(s):
35412cc
pdf fix
Browse files- app.py +10 -4
- packages.txt +0 -1
app.py
CHANGED
@@ -15,7 +15,7 @@ from langchain.chains.question_answering import load_qa_chain
|
|
15 |
from PIL import Image
|
16 |
from datetime import datetime
|
17 |
from tempfile import NamedTemporaryFile
|
18 |
-
|
19 |
|
20 |
st.subheader("Upload CV in PDF or image format")
|
21 |
uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
|
@@ -36,9 +36,15 @@ if uploaded_file:
|
|
36 |
with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
|
37 |
f.write(uploaded_file.getbuffer())
|
38 |
PDFFileName = f.name
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
# with open(PDFFileName,"rb") as f:
|
43 |
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
44 |
# pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
|
|
|
15 |
from PIL import Image
|
16 |
from datetime import datetime
|
17 |
from tempfile import NamedTemporaryFile
|
18 |
+
import pypdfium2 as pdfium
|
19 |
|
20 |
st.subheader("Upload CV in PDF or image format")
|
21 |
uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
|
|
|
36 |
with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
|
37 |
f.write(uploaded_file.getbuffer())
|
38 |
PDFFileName = f.name
|
39 |
+
pdf = pdfium.PdfDocument("my_pdf_file.pdf")
|
40 |
+
n_pages = len(pdf)
|
41 |
+
for page_number in range(n_pages):
|
42 |
+
page = pdf.get_page(page_number)
|
43 |
+
pil_image = page.render_topil(scale=1,rotation=0,crop=(0, 0, 0, 0),colour=(255, 255, 255, 255),annotations=True,greyscale=False,optimise_mode=pdfium.OptimiseMode.NONE,)
|
44 |
+
st.image(pil_image,width=700)
|
45 |
+
# images = convert_from_path(PDFFileName)
|
46 |
+
# for i in range(len(images)):
|
47 |
+
# st.image(images[i],width=700)
|
48 |
# with open(PDFFileName,"rb") as f:
|
49 |
# base64_pdf = base64.b64encode(f.read()).decode('utf-8')
|
50 |
# pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
|
packages.txt
CHANGED
@@ -2,4 +2,3 @@ poppler-utils
|
|
2 |
tesseract-ocr
|
3 |
chromium
|
4 |
chromium-driver
|
5 |
-
pdf2image
|
|
|
2 |
tesseract-ocr
|
3 |
chromium
|
4 |
chromium-driver
|
|