File size: 3,853 Bytes
c42ad4e
 
 
 
0d406fd
4f91ef7
5c37675
c42ad4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f91ef7
 
c141399
0d406fd
 
 
 
 
 
 
 
486c2a5
0d406fd
 
 
 
 
 
274b11e
c42ad4e
 
 
 
 
 
 
 
 
 
 
 
 
 
cfb8ed0
c42ad4e
 
 
 
 
 
 
 
0d406fd
c42ad4e
 
 
 
 
 
 
0d406fd
 
 
 
 
 
 
 
1e17bf9
 
 
 
 
 
 
 
0d406fd
1e17bf9
 
 
 
 
 
 
4f91ef7
 
 
0d406fd
 
 
 
 
 
 
 
 
 
e8ec388
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import streamlit as st
from streamlit.components.v1 import html
import os
import PyPDF2
import requests
from transformers import pipeline

def get_pdf_text(pdf_path):
    # creating a pdf file object
    pdfFileObj = open(pdf_path, 'rb')
        
    # creating a pdf reader object
    pdf_reader = PyPDF2.PdfReader(pdfFileObj)

    # extract text
    total_text_list = []

    for i in range(len(pdf_reader.pages)):
        page_text = pdf_reader.pages[i].extract_text()
        total_text_list.append(page_text)

    pdf_text = " ".join(total_text_list)
    pdfFileObj.close()

    return pdf_text

sum_model = pipeline("text2text-generation", model="yasminesarraj/flan-t5-small-samsum")

headers = {"Authorization": st.secrets["HF_AUTH"]}

def create_tags(payload):
    API_URL_TAGS = "https://api-inference.huggingface.co/models/fabiochiu/t5-base-tag-generation"
    
    response = requests.post(API_URL_TAGS, headers=headers, json=payload)
    return response.json()

def summarize_text(payload):
    API_URL = "https://api-inference.huggingface.co/models/yasminesarraj/flan-t5-small-samsum"
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


# Start of the app code

tab_your_paper, tab_general_topics = st.tabs(["Summarize your paper(s)", "Research topics"])

with tab_your_paper:
    html("", height=10)

    st.markdown("""
### Simply upload one or multiple PDFs and we summarize the content for you!
    """)

    pdf_files = st.file_uploader("Upload your paper as a pdf", type=[".pdf"], accept_multiple_files=True, help="You can summarize one or also multiple papers at once. The file format needs to be a pdf.")
    if pdf_files:
        recently_added = []
        for pdf in pdf_files:
            # Saving the files
            pdf_data = pdf.getvalue()
            pdf_path = os.path.join(pdf.name)
            with open(pdf_path, "wb") as f:
                f.write(pdf_data)
                recently_added.append(pdf_path)

        pdfs_content_list = []
        for recent_pdf in recently_added:
            # Reading the pdf files
            pdf_content = get_pdf_text(recent_pdf)
            print("**", pdf_content)
            pdfs_content_list.append(pdf_content)

            # Delete the files
            os.remove(recent_pdf)

        all_text_together = " ".join(pdfs_content_list)

        try:
            tags = create_tags({
                "inputs": all_text_together,
            })[0]["generated_text"]
            tags_available = True
        except:
            tags_available = False

        try:
            summary = summarize_text({
                "inputs": all_text_together
            })[0]["summary_text"]
            sum_available = True
        except:
            sum_available = False
        
        col1, col2 = st.columns(2)
        if sum_available == True:
            with col1:
                if len(recently_added) > 1:
                    st.markdown("#### Summary of your paper(s):")
                else:
                    st.markdown("#### Summary of your paper:")
                st.write(summary)
        else:
            with col1:
                st.write(sum_model(all_text_together))

        if tags_available == True:
            with col2:
                if len(recently_added) > 1:
                    st.markdown("#### Identified topics of your paper(s):")
                else:
                    st.markdown("#### Identified topics of your paper:")
                st.write(tags)

        with st.expander("See your total text"):
            st.write(all_text_together)


with tab_general_topics:
    html("", height=10)

    st.header("See the status of a research topic through a summary of the most cited papers")

    st.selectbox("Select a research topic", ["Artificial Intelligence", "Sustainability", "Cooking"])