import streamlit as st import PyPDF2 from extractive_model import summarize_with_textrank from nltk.tokenize import sent_tokenize # Set page to wide mode st.set_page_config(layout="wide") # Function to handle file upload and return its content def load_pdf(file): pdf_reader = PyPDF2.PdfReader(file) pdf_text = "" for page_num in range(len(pdf_reader.pages)): pdf_text += pdf_reader.pages[page_num].extract_text() or "" return pdf_text # Function to calculate overlap def calculate_overlap(original_text, summary_text): original_sentences = set(sent_tokenize(original_text)) summary_sentences = set(sent_tokenize(summary_text)) overlap_count = sum(1 for sentence in summary_sentences if sentence in original_sentences) overlap_percentage = (overlap_count / len(original_sentences)) * 100 if original_sentences else 0 return overlap_percentage # Main app def main(): st.title("Terms of Service Summarizer") # Layout: 3 columns col1, col2, col3 = st.columns([1, 3, 2], gap="large") # Left column: Radio buttons for summarizer choice with col1: radio_options = ['Abstractive', 'Extractive'] radio_selection = st.radio("Choose type of summarizer:", radio_options) # Middle column: Text input and File uploader with col2: user_input = st.text_area("Enter your text here:") uploaded_file = st.file_uploader("Upload a PDF", type="pdf") if st.button("Summarize"): if uploaded_file and user_input: st.warning("Please provide either text input or a PDF file, not both.") return # Perform overlap calculation if 'summary' in st.session_state: overlap = calculate_overlap(file_content, st.session_state.summary) st.session_state.overlap = overlap elif uploaded_file: # Extract text from PDF file_content = load_pdf(uploaded_file) st.write("PDF uploaded successfully.") elif user_input: file_content = user_input else: st.warning("Please upload a PDF or enter some text to summarize.") return # Perform extractive summarization if radio_selection == "Extractive": summary = summarize_with_textrank(file_content) st.session_state.summary = summary # Perform extractive summarization if radio_selection == "Abstractive": None #summary = summarize_with_textrank(file_content) #st.session_state.summary = summary # Right column: Displaying text after pressing 'Summarize' with col3: st.write("Summary:") if 'summary' in st.session_state: st.write(st.session_state.summary) # Display overlap percentage if 'overlap' in st.session_state: st.write(f"Overlap with Original Text: {st.session_state.overlap:.2f}%") if __name__ == "__main__": main()