dhiraj5678 commited on
Commit
0b13398
1 Parent(s): 4bcddd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -20
app.py CHANGED
@@ -7,26 +7,18 @@ model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
7
 
8
  import streamlit as st
9
  import os
10
- from pdfminer.high_level import extract_pages
11
 
12
  import PyPDF2
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- uploaded_file = st.file_uploader("Choose a file",type="pdf")
15
-
16
-
17
-
18
- if uploaded_file is not None:
19
-
20
- for page_layout in extract_pages(uploaded_file):
21
- for element in page_layout:
22
- st.write(element)
23
- # creating a pdf file object
24
- #pdfFileObj = open(uploaded_file.name)
25
-
26
- # creating a pdf reader object
27
- #pdfReader = PyPDF2.PdfFileReader(pdfFileObj)
28
-
29
- # printing number of pages in pdf file
30
- #number_of_pages = pdfReader.numPages
31
- #st.write(number_of_pages)
32
-
 
7
 
8
  import streamlit as st
9
  import os
 
10
 
11
  import PyPDF2
12
+ import fitz
13
+
14
+
15
+ uploaded_pdf = st.file_uploader("Load pdf: ", type=['pdf'])
16
+
17
+ if uploaded_pdf is not None:
18
+ doc = fitz.open(stream=uploaded_pdf.read(), filetype="pdf")
19
+ text = ""
20
+ for page in doc:
21
+ text += page.getText()
22
+ st.write(text)
23
+ doc.close()
24