NBayer commited on
Commit
c42ad4e
1 Parent(s): e1724bf

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +67 -0
main.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit.components.v1 import html
3
+ import os
4
+ import PyPDF2
5
+
6
+ def get_pdf_text(pdf_path):
7
+ # creating a pdf file object
8
+ pdfFileObj = open(pdf_path, 'rb')
9
+
10
+ # creating a pdf reader object
11
+ pdf_reader = PyPDF2.PdfReader(pdfFileObj)
12
+
13
+ # extract text
14
+ total_text_list = []
15
+
16
+ for i in range(len(pdf_reader.pages)):
17
+ page_text = pdf_reader.pages[i].extract_text()
18
+ total_text_list.append(page_text)
19
+
20
+ pdf_text = " ".join(total_text_list)
21
+ pdfFileObj.close()
22
+
23
+ return pdf_text
24
+
25
+ tab_general_topics, tab_your_paper = st.tabs(["Research topics", "Summarize your paper(s)"])
26
+
27
+ with tab_general_topics:
28
+ html("", height=10)
29
+
30
+ st.header("See the status of a research topic through a summary of the most cited papers")
31
+
32
+ st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])
33
+
34
+ with tab_your_paper:
35
+ html("", height=10)
36
+
37
+ st.markdown("""
38
+ ### Simply upload one or multiple PDFs and we summarize the content for you!
39
+ """)
40
+
41
+ pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
42
+ if pdf_files:
43
+ recently_added = []
44
+ for pdf in pdf_files:
45
+ # Saving the files
46
+ pdf_data = pdf.getvalue()
47
+ pdf_path = os.path.join("pdfs", pdf.name)
48
+ with open(pdf_path, "wb") as f:
49
+ f.write(pdf_data)
50
+ recently_added.append(pdf_path)
51
+
52
+ pdfs_content_list = []
53
+ print("*****", recently_added)
54
+ for recent_pdf in recently_added:
55
+ # Reading the pdf files
56
+ pdf_content = get_pdf_text(recent_pdf)
57
+ print("**", pdf_content)
58
+ pdfs_content_list.append(pdf_content)
59
+
60
+ # Delete the files
61
+ os.remove(recent_pdf)
62
+
63
+ print("************************", len(pdfs_content_list))
64
+ print(pdfs_content_list[0][:20], pdfs_content_list[1][:20])
65
+ all_text_together = " ".join(pdfs_content_list)
66
+
67
+ st.write(all_text_together)