pdf-reader / main.py
saeidseyfi's picture
init
f80b097
raw
history blame contribute delete
840 Bytes
from fastapi import FastAPI, UploadFile, File
import tempfile
import PyPDF2
app = FastAPI()
def extract_text_from_pdf(pdf_file_path):
text = ""
pdf = PyPDF2.PdfReader(open(pdf_file_path, 'rb'))
for page in pdf.pages:
text += page.extract_text()
return text.encode('utf-8')
@app.post("/extract_text/")
async def upload_file(pdf_file: UploadFile):
if not pdf_file.filename.lower().endswith('.pdf'):
return "Only PDF files are supported", 400
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
temp_file.write(pdf_file.file.read())
try:
extracted_text = extract_text_from_pdf(temp_file.name)
return {"text": extracted_text}
finally:
temp_file.close()
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)