ddovidovich commited on
Commit
75539bd
1 Parent(s): 35412cc
Files changed (2) hide show
  1. app.py +10 -4
  2. packages.txt +0 -1
app.py CHANGED
@@ -15,7 +15,7 @@ from langchain.chains.question_answering import load_qa_chain
15
  from PIL import Image
16
  from datetime import datetime
17
  from tempfile import NamedTemporaryFile
18
- from pdf2image import convert_from_path
19
 
20
  st.subheader("Upload CV in PDF or image format")
21
  uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
@@ -36,9 +36,15 @@ if uploaded_file:
36
  with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
37
  f.write(uploaded_file.getbuffer())
38
  PDFFileName = f.name
39
- images = convert_from_path(PDFFileName)
40
- for i in range(len(images)):
41
- st.image(images[i],width=700)
 
 
 
 
 
 
42
  # with open(PDFFileName,"rb") as f:
43
  # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
44
  # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
 
15
  from PIL import Image
16
  from datetime import datetime
17
  from tempfile import NamedTemporaryFile
18
+ import pypdfium2 as pdfium
19
 
20
  st.subheader("Upload CV in PDF or image format")
21
  uploaded_file = st.file_uploader("Upload PDF or Images", type=["pdf","png","jpg","jpeg"])
 
36
  with NamedTemporaryFile(delete=False, dir='.', suffix='.pdf') as f:
37
  f.write(uploaded_file.getbuffer())
38
  PDFFileName = f.name
39
+ pdf = pdfium.PdfDocument("my_pdf_file.pdf")
40
+ n_pages = len(pdf)
41
+ for page_number in range(n_pages):
42
+ page = pdf.get_page(page_number)
43
+ pil_image = page.render_topil(scale=1,rotation=0,crop=(0, 0, 0, 0),colour=(255, 255, 255, 255),annotations=True,greyscale=False,optimise_mode=pdfium.OptimiseMode.NONE,)
44
+ st.image(pil_image,width=700)
45
+ # images = convert_from_path(PDFFileName)
46
+ # for i in range(len(images)):
47
+ # st.image(images[i],width=700)
48
  # with open(PDFFileName,"rb") as f:
49
  # base64_pdf = base64.b64encode(f.read()).decode('utf-8')
50
  # pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="800" height="800" type="application/pdf"></iframe>'
packages.txt CHANGED
@@ -2,4 +2,3 @@ poppler-utils
2
  tesseract-ocr
3
  chromium
4
  chromium-driver
5
- pdf2image
 
2
  tesseract-ocr
3
  chromium
4
  chromium-driver