summarizer / document_data.py
saketh092's picture
Upload 11 files
4ea8f5c verified
raw
history blame
846 Bytes
import streamlit as st
from PyPDF2 import PdfReader
import docx2txt
def document_extract():
uploaded_file = st.file_uploader("Choose a file",accept_multiple_files=True)
content = """"""
if uploaded_file is not None:
for x in uploaded_file:
if(x.type=="application/vnd.openxmlformats-officedocument.wordprocessingml.document"):
content += docx2txt.process(x)
elif(x.type=="application/pdf"):
pdf_reader = PdfReader(x)
for page_num in range(len(pdf_reader.pages)):
content += pdf_reader.pages[page_num].extract_text()
elif(x.type=="text/plain"):
content = st.read(x)
else:
st.error("Please provide files of type **.docx**,**.pdf**,**.txt**")
return content