sblumenf commited on
Commit
fc73a76
1 Parent(s): b743b2f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -11
app.py CHANGED
@@ -1,16 +1,13 @@
1
  import gradio as gr
2
- from io import BytesIO
3
- from pdfminer.high_level import extract_text
4
- import requests
5
 
6
- def read_pdf(file):
7
- if isinstance(file, str):
8
- if file.startswith('http'):
9
- file = BytesIO(requests.get(file).content)
10
- text = extract_text(file)
 
11
  return text
12
 
13
- iface = gr.Interface(fn=read_pdf, inputs="file", outputs="text",
14
- title="PDF Text Extractor",
15
- description="Extract text from a PDF file.")
16
  iface.launch()
 
1
  import gradio as gr
2
+ import PyPDF2
 
 
3
 
4
+ def extract_text_from_pdf(pdf):
5
+ with open(pdf, 'rb') as pdf_file:
6
+ pdf_reader = PyPDF2.PdfFileReader(pdf_file)
7
+ text = ''
8
+ for page in range(pdf_reader.numPages):
9
+ text += pdf_reader.getPage(page).extractText()
10
  return text
11
 
12
+ iface = gr.Interface(fn=extract_text_from_pdf, inputs="file", outputs="text")
 
 
13
  iface.launch()