DexterSptizu commited on
Commit
7019e8e
1 Parent(s): 3072a5f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pymupdf
3
+ import io
4
+
5
+ st.set_page_config(layout="wide", page_title="PDF Text Extractor")
6
+
7
+ st.title("PDF Text Extractor")
8
+
9
+ st.markdown("Upload a PDF file to extract and view its formatted text.")
10
+
11
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
12
+
13
+ if uploaded_file is not None:
14
+ # Read the PDF file
15
+ pdf_data = uploaded_file.read()
16
+
17
+ # Open the PDF using pymupdf
18
+ doc = pymupdf.open(stream=io.BytesIO(pdf_data), filetype="pdf")
19
+
20
+ # Extract text from each page
21
+ for page_num, page in enumerate(doc, 1):
22
+ st.header(f"Page {page_num}")
23
+
24
+ text = page.get_text()
25
+
26
+ # Format the text
27
+ paragraphs = text.split('\n\n')
28
+ for paragraph in paragraphs:
29
+ if paragraph.strip():
30
+ st.markdown(paragraph)
31
+
32
+ st.markdown("---") # Add a separator between pages
33
+
34
+ st.success("PDF text extraction completed!")