Gladiator commited on
Commit
121b578
1 Parent(s): 0c2753a

integrate summarization pipeline

Browse files
Files changed (1) hide show
  1. app.py +16 -21
app.py CHANGED
@@ -2,7 +2,7 @@ import torch
2
  import nltk
3
  import validators
4
  import streamlit as st
5
- from transformers import T5Tokenizer, T5ForConditionalGeneration
6
 
7
  # local modules
8
  from extractive_summarizer.model_processors import Summarizer
@@ -12,12 +12,12 @@ from src.abstractive_summarizer import (
12
  preprocess_text_for_abstractive_summarization,
13
  )
14
 
15
- # abstractive summarizer model
16
- @st.cache()
17
- def load_abs_model():
18
- tokenizer = T5Tokenizer.from_pretrained("t5-base")
19
- model = T5ForConditionalGeneration.from_pretrained("t5-base")
20
- return tokenizer, model
21
 
22
 
23
  if __name__ == "__main__":
@@ -28,7 +28,13 @@ if __name__ == "__main__":
28
  summarize_type = st.sidebar.selectbox(
29
  "Summarization type", options=["Extractive", "Abstractive"]
30
  )
 
 
31
  nltk.download("punkt")
 
 
 
 
32
 
33
  inp_text = st.text_input("Enter text or a url here")
34
 
@@ -65,26 +71,15 @@ if __name__ == "__main__":
65
  text="Creating abstractive summary. This might take a few seconds ..."
66
  ):
67
  text_to_summarize = clean_txt
68
- abs_tokenizer, abs_model = load_abs_model()
69
  if not is_url:
70
  # list of chunks
71
  text_to_summarize = preprocess_text_for_abstractive_summarization(
72
  tokenizer=abs_tokenizer, text=clean_txt
73
  )
74
- summarized_text = abstractive_summarizer(
75
- abs_tokenizer, abs_model, text_to_summarize
76
- )
77
 
78
- # abs_tokenizer, abs_model = load_abs_model()
79
- # summarized_text = abstractive_summarizer(
80
- # abs_tokenizer, abs_model, text_to_summarize
81
- # )
82
- # elif summarize_type == "Abstractive" and is_url:
83
- # abs_url_summarizer = pipeline("summarization")
84
- # tmp_sum = abs_url_summarizer(
85
- # text_to_summarize, max_length=120, min_length=30, do_sample=False
86
- # )
87
- # summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
88
 
89
  # final summarized output
90
  st.subheader("Summarized text")
 
2
  import nltk
3
  import validators
4
  import streamlit as st
5
+ from transformers import pipeline, T5Tokenizer
6
 
7
  # local modules
8
  from extractive_summarizer.model_processors import Summarizer
 
12
  preprocess_text_for_abstractive_summarization,
13
  )
14
 
15
+ # # abstractive summarizer model
16
+ # @st.cache()
17
+ # def load_abs_model():
18
+ # tokenizer = T5Tokenizer.from_pretrained("t5-base")
19
+ # model = T5ForConditionalGeneration.from_pretrained("t5-base")
20
+ # return tokenizer, model
21
 
22
 
23
  if __name__ == "__main__":
 
28
  summarize_type = st.sidebar.selectbox(
29
  "Summarization type", options=["Extractive", "Abstractive"]
30
  )
31
+ # ---------------------------
32
+ # SETUP
33
  nltk.download("punkt")
34
+ abs_tokenizer_name = "t5-base"
35
+ abs_model_name = "t5-base"
36
+ abs_tokenizer = T5Tokenizer.from_pretrained(abs_tokenizer_name)
37
+ # ---------------------------
38
 
39
  inp_text = st.text_input("Enter text or a url here")
40
 
 
71
  text="Creating abstractive summary. This might take a few seconds ..."
72
  ):
73
  text_to_summarize = clean_txt
74
+ abs_summarizer = pipeline("summarization")
75
  if not is_url:
76
  # list of chunks
77
  text_to_summarize = preprocess_text_for_abstractive_summarization(
78
  tokenizer=abs_tokenizer, text=clean_txt
79
  )
80
+ tmp_sum = abs_summarizer(text_to_summarize, do_sample=False)
 
 
81
 
82
+ summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
 
 
 
 
 
 
 
 
 
83
 
84
  # final summarized output
85
  st.subheader("Summarized text")