EmreYY20 commited on
Commit
97f7d3e
1 Parent(s): 2a61e91

add metric

Browse files
Files changed (2) hide show
  1. app.py +27 -1
  2. extractive_model.py +2 -0
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import streamlit as st
2
  import PyPDF2
3
- from extractive_model import summarize_with_textrank # Renamed function
 
 
4
 
5
  # Set page to wide mode
6
  st.set_page_config(layout="wide")
@@ -13,6 +15,14 @@ def load_pdf(file):
13
  pdf_text += pdf_reader.pages[page_num].extract_text() or ""
14
  return pdf_text
15
 
 
 
 
 
 
 
 
 
16
  # Main app
17
  def main():
18
  st.title("Terms of Service Summarizer")
@@ -33,6 +43,12 @@ def main():
33
  if uploaded_file and user_input:
34
  st.warning("Please provide either text input or a PDF file, not both.")
35
  return
 
 
 
 
 
 
36
  elif uploaded_file:
37
  # Extract text from PDF
38
  file_content = load_pdf(uploaded_file)
@@ -48,11 +64,21 @@ def main():
48
  summary = summarize_with_textrank(file_content)
49
  st.session_state.summary = summary
50
 
 
 
 
 
 
 
51
  # Right column: Displaying text after pressing 'Summarize'
52
  with col3:
53
  st.write("Summary:")
54
  if 'summary' in st.session_state:
55
  st.write(st.session_state.summary)
56
 
 
 
 
 
57
  if __name__ == "__main__":
58
  main()
 
1
  import streamlit as st
2
  import PyPDF2
3
+ from extractive_model import summarize_with_textrank
4
+ from nltk.tokenize import sent_tokenize
5
+
6
 
7
  # Set page to wide mode
8
  st.set_page_config(layout="wide")
 
15
  pdf_text += pdf_reader.pages[page_num].extract_text() or ""
16
  return pdf_text
17
 
18
+ # Function to calculate overlap
19
+ def calculate_overlap(original_text, summary_text):
20
+ original_sentences = set(sent_tokenize(original_text))
21
+ summary_sentences = set(sent_tokenize(summary_text))
22
+ overlap_count = sum(1 for sentence in summary_sentences if sentence in original_sentences)
23
+ overlap_percentage = (overlap_count / len(original_sentences)) * 100 if original_sentences else 0
24
+ return overlap_percentage
25
+
26
  # Main app
27
  def main():
28
  st.title("Terms of Service Summarizer")
 
43
  if uploaded_file and user_input:
44
  st.warning("Please provide either text input or a PDF file, not both.")
45
  return
46
+
47
+ # Perform overlap calculation
48
+ if 'summary' in st.session_state:
49
+ overlap = calculate_overlap(file_content, st.session_state.summary)
50
+ st.session_state.overlap = overlap
51
+
52
  elif uploaded_file:
53
  # Extract text from PDF
54
  file_content = load_pdf(uploaded_file)
 
64
  summary = summarize_with_textrank(file_content)
65
  st.session_state.summary = summary
66
 
67
+ # Perform extractive summarization
68
+ if radio_selection == "Abstractive":
69
+ None
70
+ #summary = summarize_with_textrank(file_content)
71
+ #st.session_state.summary = summary
72
+
73
  # Right column: Displaying text after pressing 'Summarize'
74
  with col3:
75
  st.write("Summary:")
76
  if 'summary' in st.session_state:
77
  st.write(st.session_state.summary)
78
 
79
+ # Display overlap percentage
80
+ if 'overlap' in st.session_state:
81
+ st.write(f"Overlap with Original Text: {st.session_state.overlap:.2f}%")
82
+
83
  if __name__ == "__main__":
84
  main()
extractive_model.py CHANGED
@@ -12,6 +12,8 @@ from sumy.utils import get_stop_words"""
12
  from sumy.parsers.plaintext import PlaintextParser
13
  from sumy.nlp.tokenizers import Tokenizer
14
  from sumy.summarizers.text_rank import TextRankSummarizer
 
 
15
  import nltk
16
  nltk.download('punkt')
17
 
 
12
  from sumy.parsers.plaintext import PlaintextParser
13
  from sumy.nlp.tokenizers import Tokenizer
14
  from sumy.summarizers.text_rank import TextRankSummarizer
15
+ from sumy.summarizers.lsa import LsaSummarizer
16
+ from sumy.summarizers.lex_rank import LexRankSummarizer
17
  import nltk
18
  nltk.download('punkt')
19