mohitmayank commited on
Commit
f9583a4
·
1 Parent(s): 81b2b07
Files changed (2) hide show
  1. app.py +39 -12
  2. requirements.txt +1 -1
app.py CHANGED
@@ -4,41 +4,68 @@ import streamlit as st
4
  from newspaper import Article
5
  from transformers import pipeline
6
 
 
7
  st.set_page_config(layout="wide", page_title="SummarizeLink")
8
 
9
- # load the summarization model
10
  @st.cache(allow_output_mutation=True)
11
  def load_summarize_model():
12
  # model = pipeline("summarization", model='sshleifer/distilbart-cnn-12-6')
13
  model = pipeline("summarization")
14
  return model
 
 
15
  summ = load_summarize_model()
16
 
17
- # define functions
18
  def download_and_parse_article(url):
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  article = Article(url)
 
20
  article.download()
21
  article.parse()
 
22
  return article.text
23
 
24
- # define the app
 
25
  st.title("SummarizeLink")
26
- st.text("Paste any article link below and click on the 'Summarize Text' button to get the summarized data")
27
- # st.subheader("This application is using HuggingFace's transformers pre-trained model for text summarization.")
 
28
  link = st.text_area('Paste your link here...', "https://towardsdatascience.com/a-guide-to-the-knowledge-graphs-bfb5c40272f1", height=50)
29
  button = st.button("Summarize")
30
- max_lengthy = st.sidebar.slider('Max summary length', min_value=30, max_value=700, value=100, step=10)
31
- # num_beamer = st.sidebar.slider('Speed vs quality of Summary (1 is fastest but less accurate)', min_value=1, max_value=8, value=4, step=1)
32
- with st.spinner("Summarizing..."):
 
 
 
33
  if button and link:
34
- text = download_and_parse_article(link) # get the text
 
 
35
  summary = summ(text,
36
  truncation=True,
37
- max_length = max_lengthy,
38
- min_length = 50,
39
- num_beams=5,
40
  do_sample=True,
41
  early_stopping=True,
42
  repetition_penalty=1.5,
43
  length_penalty=1.5)[0]
 
 
44
  st.write(summary['summary_text'])
 
4
  from newspaper import Article
5
  from transformers import pipeline
6
 
7
+ # set config
8
  st.set_page_config(layout="wide", page_title="SummarizeLink")
9
 
10
+ # load the summarization model (cache for faster loading)
11
  @st.cache(allow_output_mutation=True)
12
  def load_summarize_model():
13
  # model = pipeline("summarization", model='sshleifer/distilbart-cnn-12-6')
14
  model = pipeline("summarization")
15
  return model
16
+
17
+ # loading the model
18
  summ = load_summarize_model()
19
 
20
+ # define the down functions
21
  def download_and_parse_article(url):
22
+ """Downloads and parses an article from a URL.
23
+
24
+ Parameters
25
+ ----------
26
+ url : str
27
+ The URL of the article to download and parse.
28
+
29
+ Returns
30
+ -------
31
+ article : newspaper.Article
32
+ The article downloaded and parsed.
33
+ """
34
+ # define the article
35
  article = Article(url)
36
+ # download and parse the article
37
  article.download()
38
  article.parse()
39
+ # return the article
40
  return article.text
41
 
42
+ # APP
43
+ # set title and subtitle
44
  st.title("SummarizeLink")
45
+ st.markdown("Paste any article link below and click on the 'Summarize' button.")
46
+ st.markdown("*Note:* We truncate the text incase the article is lengthy! 🖖")
47
+ # create the input text box and setting panel
48
  link = st.text_area('Paste your link here...', "https://towardsdatascience.com/a-guide-to-the-knowledge-graphs-bfb5c40272f1", height=50)
49
  button = st.button("Summarize")
50
+ min_length = st.sidebar.slider('Min summary length', min_value=10, max_value=100, value=50, step=10)
51
+ max_length = st.sidebar.slider('Max summary length', min_value=30, max_value=700, value=100, step=10)
52
+ num_beams = st.sidebar.slider('Beam length', min_value=1, max_value=10, value=5, step=1)
53
+
54
+ # if button is clicked
55
+ with st.spinner("Parsing article and Summarizing..."):
56
  if button and link:
57
+ # get the text
58
+ text = download_and_parse_article(link)
59
+ # summarize the text
60
  summary = summ(text,
61
  truncation=True,
62
+ max_length = max_length,
63
+ min_length = min_length,
64
+ num_beams=num_beams,
65
  do_sample=True,
66
  early_stopping=True,
67
  repetition_penalty=1.5,
68
  length_penalty=1.5)[0]
69
+ # display the summary
70
+ st.markdown("**Summary:**")
71
  st.write(summary['summary_text'])
requirements.txt CHANGED
@@ -1,2 +1,2 @@
1
  streamlit
2
- newspaper
 
1
  streamlit
2
+ newspaper3k