Rivalcoder commited on
Commit
ff08e35
·
1 Parent(s): 4de3353

Add application file

Browse files
Files changed (1) hide show
  1. app.py +25 -0
app.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+ import gradio as gr
3
+
4
+ def extract_text_from_pdf(file):
5
+ if file is None:
6
+ return "No file uploaded."
7
+
8
+ try:
9
+ doc = fitz.open(stream=file.read(), filetype="pdf")
10
+ full_text = ""
11
+ for page_num in range(len(doc)):
12
+ page = doc.load_page(page_num)
13
+ text = page.get_text()
14
+ full_text += f"\n\n--- Page {page_num + 1} ---\n\n{text}"
15
+ return full_text
16
+ except Exception as e:
17
+ return f"Error: {str(e)}"
18
+
19
+ gr.Interface(
20
+ fn=extract_text_from_pdf,
21
+ inputs=gr.File(label="Upload PDF", file_types=[".pdf"]),
22
+ outputs="text",
23
+ title="PDF to Text Extractor",
24
+ description="Upload a PDF file and get all the extracted text from each page.",
25
+ ).launch()