integrate summarization pipeline
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import torch
|
|
2 |
import nltk
|
3 |
import validators
|
4 |
import streamlit as st
|
5 |
-
from transformers import
|
6 |
|
7 |
# local modules
|
8 |
from extractive_summarizer.model_processors import Summarizer
|
@@ -12,12 +12,12 @@ from src.abstractive_summarizer import (
|
|
12 |
preprocess_text_for_abstractive_summarization,
|
13 |
)
|
14 |
|
15 |
-
# abstractive summarizer model
|
16 |
-
@st.cache()
|
17 |
-
def load_abs_model():
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
|
22 |
|
23 |
if __name__ == "__main__":
|
@@ -28,7 +28,13 @@ if __name__ == "__main__":
|
|
28 |
summarize_type = st.sidebar.selectbox(
|
29 |
"Summarization type", options=["Extractive", "Abstractive"]
|
30 |
)
|
|
|
|
|
31 |
nltk.download("punkt")
|
|
|
|
|
|
|
|
|
32 |
|
33 |
inp_text = st.text_input("Enter text or a url here")
|
34 |
|
@@ -65,26 +71,15 @@ if __name__ == "__main__":
|
|
65 |
text="Creating abstractive summary. This might take a few seconds ..."
|
66 |
):
|
67 |
text_to_summarize = clean_txt
|
68 |
-
|
69 |
if not is_url:
|
70 |
# list of chunks
|
71 |
text_to_summarize = preprocess_text_for_abstractive_summarization(
|
72 |
tokenizer=abs_tokenizer, text=clean_txt
|
73 |
)
|
74 |
-
|
75 |
-
abs_tokenizer, abs_model, text_to_summarize
|
76 |
-
)
|
77 |
|
78 |
-
|
79 |
-
# summarized_text = abstractive_summarizer(
|
80 |
-
# abs_tokenizer, abs_model, text_to_summarize
|
81 |
-
# )
|
82 |
-
# elif summarize_type == "Abstractive" and is_url:
|
83 |
-
# abs_url_summarizer = pipeline("summarization")
|
84 |
-
# tmp_sum = abs_url_summarizer(
|
85 |
-
# text_to_summarize, max_length=120, min_length=30, do_sample=False
|
86 |
-
# )
|
87 |
-
# summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
|
88 |
|
89 |
# final summarized output
|
90 |
st.subheader("Summarized text")
|
|
|
2 |
import nltk
|
3 |
import validators
|
4 |
import streamlit as st
|
5 |
+
from transformers import pipeline, T5Tokenizer
|
6 |
|
7 |
# local modules
|
8 |
from extractive_summarizer.model_processors import Summarizer
|
|
|
12 |
preprocess_text_for_abstractive_summarization,
|
13 |
)
|
14 |
|
15 |
+
# # abstractive summarizer model
|
16 |
+
# @st.cache()
|
17 |
+
# def load_abs_model():
|
18 |
+
# tokenizer = T5Tokenizer.from_pretrained("t5-base")
|
19 |
+
# model = T5ForConditionalGeneration.from_pretrained("t5-base")
|
20 |
+
# return tokenizer, model
|
21 |
|
22 |
|
23 |
if __name__ == "__main__":
|
|
|
28 |
summarize_type = st.sidebar.selectbox(
|
29 |
"Summarization type", options=["Extractive", "Abstractive"]
|
30 |
)
|
31 |
+
# ---------------------------
|
32 |
+
# SETUP
|
33 |
nltk.download("punkt")
|
34 |
+
abs_tokenizer_name = "t5-base"
|
35 |
+
abs_model_name = "t5-base"
|
36 |
+
abs_tokenizer = T5Tokenizer.from_pretrained(abs_tokenizer_name)
|
37 |
+
# ---------------------------
|
38 |
|
39 |
inp_text = st.text_input("Enter text or a url here")
|
40 |
|
|
|
71 |
text="Creating abstractive summary. This might take a few seconds ..."
|
72 |
):
|
73 |
text_to_summarize = clean_txt
|
74 |
+
abs_summarizer = pipeline("summarization")
|
75 |
if not is_url:
|
76 |
# list of chunks
|
77 |
text_to_summarize = preprocess_text_for_abstractive_summarization(
|
78 |
tokenizer=abs_tokenizer, text=clean_txt
|
79 |
)
|
80 |
+
tmp_sum = abs_summarizer(text_to_summarize, do_sample=False)
|
|
|
|
|
81 |
|
82 |
+
summarized_text = " ".join([summ["summary_text"] for summ in tmp_sum])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# final summarized output
|
85 |
st.subheader("Summarized text")
|