Spaces:

NBayer
/

Streamlit_app_paper

Runtime error

App Files Files Community

Streamlit_app_paper / main.py

NBayer

Upload main.py

c42ad4e over 1 year ago

raw

history blame

No virus

2.16 kB

	import streamlit as st
	from streamlit.components.v1 import html
	import os
	import PyPDF2

	def get_pdf_text(pdf_path):
	# creating a pdf file object
	pdfFileObj = open(pdf_path, 'rb')

	# creating a pdf reader object
	pdf_reader = PyPDF2.PdfReader(pdfFileObj)

	# extract text
	total_text_list = []

	for i in range(len(pdf_reader.pages)):
	page_text = pdf_reader.pages[i].extract_text()
	total_text_list.append(page_text)

	pdf_text = " ".join(total_text_list)
	pdfFileObj.close()

	return pdf_text

	tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])

	with tab_general_topics:
	html("", height=10)

	st.header("See the status of a research topic through a summary of the most cited papers")

	st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])

	with tab_your_paper:
	html("", height=10)

	st.markdown("""
	### Simply upload one or multiple PDFs and we summarize the content for you!
	""")

	pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
	if pdf_files:
	recently_added = []
	for pdf in pdf_files:
	# Saving the files
	pdf_data = pdf.getvalue()
	pdf_path = os.path.join("pdfs", pdf.name)
	with open(pdf_path, "wb") as f:
	f.write(pdf_data)
	recently_added.append(pdf_path)

	pdfs_content_list = []
	print("*****", recently_added)
	for recent_pdf in recently_added:
	# Reading the pdf files
	pdf_content = get_pdf_text(recent_pdf)
	print("**", pdf_content)
	pdfs_content_list.append(pdf_content)

	# Delete the files
	os.remove(recent_pdf)

	print("************************", len(pdfs_content_list))
	print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
	all_text_together = " ".join(pdfs_content_list)

	st.write(all_text_together)