File size: 5,026 Bytes
c0ae847
03ddbfd
50d3158
7a228d7
432c28d
4b1dcc8
cce90fc
46193fd
20d6d68
 
 
 
 
 
 
 
03ddbfd
20d6d68
 
 
 
 
 
 
 
 
03ddbfd
20d6d68
b3bc472
ffc170d
1918f01
3dd36dd
 
 
 
 
 
 
bfd0b51
 
20d6d68
 
b87bcef
20d6d68
 
2a61e91
 
 
 
03ddbfd
20d6d68
 
b87bcef
 
20d6d68
2a61e91
b87bcef
 
 
94d59bf
f497f7f
2a61e91
20d6d68
94d59bf
 
 
 
 
97f7d3e
da1f55e
1918f01
da1f55e
 
 
 
7a228d7
ccd2173
97f7d3e
b3bc472
 
 
 
 
 
 
 
 
 
d7485e8
c0ade28
432c28d
 
 
d7485e8
c0ade28
13e8889
d7485e8
 
20d6d68
 
0f4d5d5
cce90fc
 
97f7d3e
20d6d68
b87bcef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import streamlit as st
import PyPDF2
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5
from keyword_extraction import extract_keywords
from keyphrase_extraction import extract_sentences_with_obligations
#from blanc import BlancHelp

# Set page to wide mode
st.set_page_config(layout="wide")

# Function to handle file upload and return its content
def load_pdf(file):
    pdf_reader = PyPDF2.PdfReader(file)
    pdf_text = ""
    for page_num in range(len(pdf_reader.pages)):
        pdf_text += pdf_reader.pages[page_num].extract_text() or ""
    return pdf_text

# Main app
def main():
    st.title("Terms of Service Summarizer")

    # Layout: 3 columns
    col1, col2, col3 = st.columns([1, 3, 2], gap="large")

    # Left column: Radio buttons for summarizer choice
    with col1:
        radio_options = ["Abstractive (T5)", "Abstractive (LED)", 'Abstractive (Fine-tuned BART)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)', 
                         "Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
        
        help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
                "It uses a fine-tuned model on BART-large-CNN.<br>" \
                "Extractive: Extractive summarization selects and extracts sentences or phrases directly from the original text to create a summary using the TextRank algorithm.<br>" \
                "Keyword Extraction: Keyword extraction identifies and extracts important keywords or terms from the text using the Rake algorithm. " \
                "These keywords can be used for various purposes such as content analysis and SEO.<br>" \
                "Keyphrase Extraction: Keyphrase extraction is similar to keyword extraction but focuses on identifying multi-word phrases or expressions that are significant in the text using the Rake algorithm."
        
        radio_selection = st.radio("Choose type of summarizer:", radio_options, help=help_text)
                                   
    # Middle column: Text input and File uploader
    with col2:
        user_input = st.text_area("Enter your text here:")
        uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
        if st.button("Summarize"):
            if uploaded_file and user_input:
                st.warning("Please provide either text input or a PDF file, not both.")
                return
            elif uploaded_file:
                # Extract text from PDF
                file_content = load_pdf(uploaded_file)
                st.write("PDF uploaded successfully.")
            elif user_input:
                file_content = user_input
            else:
                st.warning("Please upload a PDF or enter some text to summarize.")
                return

            # Perform extractive summarization
            if radio_selection == "Extractive (TextRank)":
                summary = summarize_with_textrank(file_content)
                st.session_state.summary = summary

            # Perform extractive summarization
            if radio_selection == "Extractive (Latent Semantic Analysis)":
                summary = summarize_with_lsa(file_content)
                st.session_state.summary = summary

            # Perform extractive summarization
            if radio_selection == "Abstractive (Fine-tuned BART)":
                summary = summarize_with_bart_ft(file_content)
                st.session_state.summary = summary

            # Perform extractive summarization
            if radio_selection == "Abstractive (BART-large-CNN)":
                summary = summarize_with_bart_cnn(file_content)
                st.session_state.summary = summary

            # Perform extractive summarization
            if radio_selection == "Abstractive (T5)":
                summary = summarize_with_t5(file_content)
                st.session_state.summary = summary

            # Perform extractive summarization
            if radio_selection == "Abstractive (LED))":
                summary = summarize_with_led(file_content)
                st.session_state.summary = summary

            # Perform Keyword Extraction
            if radio_selection == "Keyword Extraction (RAKE)":
                summary = extract_keywords(file_content)
                st.session_state.summary = summary

            # Perform Keyphrase Extraction
            if radio_selection == "Keyphrase Extraction (RAKE)":
                summary = extract_sentences_with_obligations(file_content)
                st.session_state.summary = summary

    # Right column: Displaying text after pressing 'Summarize'
    with col3:
        st.write("Summary:")
        if 'summary' in st.session_state:
            st.write(st.session_state.summary)

if __name__ == "__main__":
    main()