yassTrad commited on
Commit
9433e8d
1 Parent(s): 4f3e9b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -3,6 +3,7 @@ from goose3 import Goose
3
  from fake_useragent import UserAgent
4
  from bs4 import BeautifulSoup
5
  from transformers import pipeline
 
6
  #from newsplease import NewsPlease
7
  import validators
8
  import streamlit as st
@@ -21,7 +22,15 @@ def article_text_extractor(url: str):
21
  return text
22
 
23
 
24
-
 
 
 
 
 
 
 
 
25
 
26
  @st.cache(allow_output_mutation=True)
27
  def extractive_model():
@@ -77,13 +86,13 @@ if is_url:
77
  summarize = st.button("Summarize")
78
 
79
  if summarize:
80
- text_to_summarize = clean_text if is_url else plain_text
81
 
82
  with st.spinner(text="Loading Model and creating summary. This might take a few seconds depending on the length of your text..."):
83
  model = model()
84
  #summarized_text = text_to_summarize if len(text_to_summarize) > 60 else ''.join(model(body, min_length=60))
85
- min_ = min(150,len(text_to_summarize)/5)
86
- max_ = min(800,len(text_to_summarize)/2)
87
  summarized_text = ''.join(model(text_to_summarize, min_length=min_,max_length=max_)) if summary_type == "Extractive" else model(text_to_summarize, min_length=min_,max_length=max_)[0]['summary_text']
88
 
89
  st.subheader("Original text")
 
3
  from fake_useragent import UserAgent
4
  from bs4 import BeautifulSoup
5
  from transformers import pipeline
6
+ import re
7
  #from newsplease import NewsPlease
8
  import validators
9
  import streamlit as st
 
22
  return text
23
 
24
 
25
+ def preprocess_text(x):
26
+ x = x.encode("ascii", "ignore").decode() # unicode
27
+ x = re.sub(r"https*\S+", " ", x) # url
28
+ x = re.sub(r"@\S+", " ", x) # mentions
29
+ x = re.sub(r"#\S+", " ", x) # hastags
30
+ x = re.sub(r"\s{2,}", " ", x) # over spaces
31
+ x = re.sub("[^.,!?A-Za-z0-9]+", " ", x) # special charachters except .,!?
32
+
33
+ return x
34
 
35
  @st.cache(allow_output_mutation=True)
36
  def extractive_model():
 
86
  summarize = st.button("Summarize")
87
 
88
  if summarize:
89
+ text_to_summarize = preprocess_text(clean_text) if is_url else preprocess_text(plain_text)
90
 
91
  with st.spinner(text="Loading Model and creating summary. This might take a few seconds depending on the length of your text..."):
92
  model = model()
93
  #summarized_text = text_to_summarize if len(text_to_summarize) > 60 else ''.join(model(body, min_length=60))
94
+ min_ = min(100,len(text_to_summarize)/5)
95
+ max_ = min(600,len(text_to_summarize)/2)
96
  summarized_text = ''.join(model(text_to_summarize, min_length=min_,max_length=max_)) if summary_type == "Extractive" else model(text_to_summarize, min_length=min_,max_length=max_)[0]['summary_text']
97
 
98
  st.subheader("Original text")