EmreYY20
add metric
97f7d3e
raw
history blame
No virus
3.11 kB
import streamlit as st
import PyPDF2
from extractive_model import summarize_with_textrank
from nltk.tokenize import sent_tokenize
# Set page to wide mode
st.set_page_config(layout="wide")
# Function to handle file upload and return its content
def load_pdf(file):
pdf_reader = PyPDF2.PdfReader(file)
pdf_text = ""
for page_num in range(len(pdf_reader.pages)):
pdf_text += pdf_reader.pages[page_num].extract_text() or ""
return pdf_text
# Function to calculate overlap
def calculate_overlap(original_text, summary_text):
original_sentences = set(sent_tokenize(original_text))
summary_sentences = set(sent_tokenize(summary_text))
overlap_count = sum(1 for sentence in summary_sentences if sentence in original_sentences)
overlap_percentage = (overlap_count / len(original_sentences)) * 100 if original_sentences else 0
return overlap_percentage
# Main app
def main():
st.title("Terms of Service Summarizer")
# Layout: 3 columns
col1, col2, col3 = st.columns([1, 3, 2], gap="large")
# Left column: Radio buttons for summarizer choice
with col1:
radio_options = ['Abstractive', 'Extractive']
radio_selection = st.radio("Choose type of summarizer:", radio_options)
# Middle column: Text input and File uploader
with col2:
user_input = st.text_area("Enter your text here:")
uploaded_file = st.file_uploader("Upload a PDF", type="pdf")
if st.button("Summarize"):
if uploaded_file and user_input:
st.warning("Please provide either text input or a PDF file, not both.")
return
# Perform overlap calculation
if 'summary' in st.session_state:
overlap = calculate_overlap(file_content, st.session_state.summary)
st.session_state.overlap = overlap
elif uploaded_file:
# Extract text from PDF
file_content = load_pdf(uploaded_file)
st.write("PDF uploaded successfully.")
elif user_input:
file_content = user_input
else:
st.warning("Please upload a PDF or enter some text to summarize.")
return
# Perform extractive summarization
if radio_selection == "Extractive":
summary = summarize_with_textrank(file_content)
st.session_state.summary = summary
# Perform extractive summarization
if radio_selection == "Abstractive":
None
#summary = summarize_with_textrank(file_content)
#st.session_state.summary = summary
# Right column: Displaying text after pressing 'Summarize'
with col3:
st.write("Summary:")
if 'summary' in st.session_state:
st.write(st.session_state.summary)
# Display overlap percentage
if 'overlap' in st.session_state:
st.write(f"Overlap with Original Text: {st.session_state.overlap:.2f}%")
if __name__ == "__main__":
main()