Spaces:
Runtime error
Runtime error
File size: 4,513 Bytes
c0ae847 03ddbfd 50d3158 a4d0721 432c28d 4b1dcc8 cce90fc 46193fd 20d6d68 03ddbfd 20d6d68 03ddbfd 20d6d68 1918f01 ffc170d 1918f01 3dd36dd bfd0b51 20d6d68 b87bcef 20d6d68 2a61e91 03ddbfd 20d6d68 b87bcef 20d6d68 2a61e91 b87bcef 94d59bf f497f7f 2a61e91 20d6d68 94d59bf 97f7d3e da1f55e 1918f01 da1f55e ccd2173 97f7d3e d7485e8 c0ade28 432c28d d7485e8 c0ade28 13e8889 d7485e8 20d6d68 0f4d5d5 cce90fc 97f7d3e 20d6d68 b87bcef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import streamlit as st
import PyPDF2
from extractive_summarization import summarize_with_textrank, summarize_with_lsa
from abstractive_summarization import summarize_with_bart, summarize_with_bart_ft
from keyword_extraction import extract_keywords
from keyphrase_extraction import extract_sentences_with_obligations
#from blanc import BlancHelp
# Set page to wide mode
st.set_page_config(layout="wide")
# Function to handle file upload and return its content
def load_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
pdf_text = ""
for page_num in range(len(pdf_reader.pages)):
pdf_text += pdf_reader.pages[page_num].extract_text() or ""
return pdf_text
# Main app
def main():
st.title("Terms of Service Summarizer")
# Layout: 3 columns
col1, col2, col3 = st.columns([1, 3, 2], gap="large")
# Left column: Radio buttons for summarizer choice
with col1:
radio_options = ['Abstractive (Fine-tuned BART)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)',
"Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)']
help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \
"It uses a fine-tuned model on BART-large-CNN.<br>" \
"Extractive: Extractive summarization selects and extracts sentences or phrases directly from the original text to create a summary using the TextRank algorithm.<br>" \
"Keyword Extraction: Keyword extraction identifies and extracts important keywords or terms from the text using the Rake algorithm. " \
"These keywords can be used for various purposes such as content analysis and SEO.<br>" \
"Keyphrase Extraction: Keyphrase extraction is similar to keyword extraction but focuses on identifying multi-word phrases or expressions that are significant in the text using the Rake algorithm."
radio_selection = st.radio("Choose type of summarizer:", radio_options, help=help_text)
# Middle column: Text input and File uploader
with col2:
user_input = st.text_area("Enter your text here:")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if st.button("Summarize"):
if uploaded_file and user_input:
st.warning("Please provide either text input or a PDF file, not both.")
return
elif uploaded_file:
# Extract text from PDF
file_content = load_pdf(uploaded_file)
st.write("PDF uploaded successfully.")
elif user_input:
file_content = user_input
else:
st.warning("Please upload a PDF or enter some text to summarize.")
return
# Perform extractive summarization
if radio_selection == "Extractive (TextRank)":
summary = summarize_with_textrank(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Extractive (Latent Semantic Analysis)":
summary = summarize_with_lsa(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive (Fine-tuned BART)":
summary = summarize_with_bart_ft(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive (BART-large-CNN)":
summary = summarize_with_bart(file_content)
st.session_state.summary = summary
# Perform Keyword Extraction
if radio_selection == "Keyword Extraction (RAKE)":
summary = extract_keywords(file_content)
st.session_state.summary = summary
# Perform Keyphrase Extraction
if radio_selection == "Keyphrase Extraction (RAKE)":
summary = extract_sentences_with_obligations(file_content)
st.session_state.summary = summary
# Right column: Displaying text after pressing 'Summarize'
with col3:
st.write("Summary:")
if 'summary' in st.session_state:
st.write(st.session_state.summary)
if __name__ == "__main__":
main()
|