import streamlit as st import PyPDF2 from extractive_summarization import summarize_with_textrank, summarize_with_lsa from abstractive_summarization import summarize_with_bart_cnn, summarize_with_bart_ft, summarize_with_led, summarize_with_t5 from keyword_extraction import extract_keywords from keyphrase_extraction import extract_sentences_with_obligations #from blanc import BlancHelp # Set page to wide mode st.set_page_config(layout="wide") # Function to handle file upload and return its content def load_pdf(file): pdf_reader = PyPDF2.PdfReader(file) pdf_text = "" for page_num in range(len(pdf_reader.pages)): pdf_text += pdf_reader.pages[page_num].extract_text() or "" return pdf_text # Main app def main(): st.title("Terms of Service Summarizer") # Layout: 3 columns col1, col2, col3 = st.columns([1, 3, 2], gap="large") # Left column: Radio buttons for summarizer choice with col1: radio_options = ["Abstractive (T5)", "Abstractive (LED)", 'Abstractive (Fine-tuned BART)', "Abstractive (BART-large-CNN)", 'Extractive (TextRank)', "Extractive (Latent Semantic Analysis)", 'Keyphrase Extraction (RAKE)', 'Keyword Extraction (RAKE)'] help_text = "Abstractive: Abstractive summarization generates a summary that may contain words not present in the original text. " \ "It uses a fine-tuned model on BART-large-CNN.
" \ "Extractive: Extractive summarization selects and extracts sentences or phrases directly from the original text to create a summary using the TextRank algorithm.
" \ "Keyword Extraction: Keyword extraction identifies and extracts important keywords or terms from the text using the Rake algorithm. " \ "These keywords can be used for various purposes such as content analysis and SEO.
" \ "Keyphrase Extraction: Keyphrase extraction is similar to keyword extraction but focuses on identifying multi-word phrases or expressions that are significant in the text using the Rake algorithm." radio_selection = st.radio("Choose type of summarizer:", radio_options, help=help_text) # Middle column: Text input and File uploader with col2: user_input = st.text_area("Enter your text here:") uploaded_file = st.file_uploader("Upload a PDF", type="pdf") if st.button("Summarize"): if uploaded_file and user_input: st.warning("Please provide either text input or a PDF file, not both.") return elif uploaded_file: # Extract text from PDF file_content = load_pdf(uploaded_file) st.write("PDF uploaded successfully.") elif user_input: file_content = user_input else: st.warning("Please upload a PDF or enter some text to summarize.") return # Perform extractive summarization if radio_selection == "Extractive (TextRank)": summary = summarize_with_textrank(file_content) st.session_state.summary = summary # Perform extractive summarization if radio_selection == "Extractive (Latent Semantic Analysis)": summary = summarize_with_lsa(file_content) st.session_state.summary = summary # Perform extractive summarization if radio_selection == "Abstractive (Fine-tuned BART)": summary = summarize_with_bart_ft(file_content) st.session_state.summary = summary # Perform extractive summarization if radio_selection == "Abstractive (BART-large-CNN)": summary = summarize_with_bart_cnn(file_content) st.session_state.summary = summary # Perform extractive summarization if radio_selection == "Abstractive (T5)": summary = summarize_with_t5(file_content) st.session_state.summary = summary # Perform extractive summarization if radio_selection == "Abstractive (LED))": summary = summarize_with_led(file_content) st.session_state.summary = summary # Perform Keyword Extraction if radio_selection == "Keyword Extraction (RAKE)": summary = extract_keywords(file_content) st.session_state.summary = summary # Perform Keyphrase Extraction if radio_selection == "Keyphrase Extraction (RAKE)": summary = extract_sentences_with_obligations(file_content) st.session_state.summary = summary # Right column: Displaying text after pressing 'Summarize' with col3: st.write("Summary:") if 'summary' in st.session_state: st.write(st.session_state.summary) if __name__ == "__main__": main()