NBayer's picture
Upload main.py
c42ad4e
raw
history blame
No virus
2.16 kB
import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
def get_pdf_text(pdf_path):
# creating a pdf file object
pdfFileObj = open(pdf_path, 'rb')
# creating a pdf reader object
pdf_reader = PyPDF2.PdfReader(pdfFileObj)
# extract text
total_text_list = []
for i in range(len(pdf_reader.pages)):
page_text = pdf_reader.pages[i].extract_text()
total_text_list.append(page_text)
pdf_text = " ".join(total_text_list)
pdfFileObj.close()
return pdf_text
tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])
with tab_general_topics:
html("", height=10)
st.header("See the status of a research topic through a summary of the most cited papers")
st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])
with tab_your_paper:
html("", height=10)
st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
""")
pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
if pdf_files:
recently_added = []
for pdf in pdf_files:
# Saving the files
pdf_data = pdf.getvalue()
pdf_path = os.path.join("pdfs", pdf.name)
with open(pdf_path, "wb") as f:
f.write(pdf_data)
recently_added.append(pdf_path)
pdfs_content_list = []
print("*****", recently_added)
for recent_pdf in recently_added:
# Reading the pdf files
pdf_content = get_pdf_text(recent_pdf)
print("**", pdf_content)
pdfs_content_list.append(pdf_content)
# Delete the files
os.remove(recent_pdf)
print("************************", len(pdfs_content_list))
print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
all_text_together = " ".join(pdfs_content_list)
st.write(all_text_together)