Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from goose3 import Goose
|
|
3 |
from fake_useragent import UserAgent
|
4 |
from bs4 import BeautifulSoup
|
5 |
from transformers import pipeline
|
|
|
6 |
#from newsplease import NewsPlease
|
7 |
import validators
|
8 |
import streamlit as st
|
@@ -21,7 +22,15 @@ def article_text_extractor(url: str):
|
|
21 |
return text
|
22 |
|
23 |
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
|
26 |
@st.cache(allow_output_mutation=True)
|
27 |
def extractive_model():
|
@@ -77,13 +86,13 @@ if is_url:
|
|
77 |
summarize = st.button("Summarize")
|
78 |
|
79 |
if summarize:
|
80 |
-
text_to_summarize = clean_text if is_url else plain_text
|
81 |
|
82 |
with st.spinner(text="Loading Model and creating summary. This might take a few seconds depending on the length of your text..."):
|
83 |
model = model()
|
84 |
#summarized_text = text_to_summarize if len(text_to_summarize) > 60 else ''.join(model(body, min_length=60))
|
85 |
-
min_ = min(
|
86 |
-
max_ = min(
|
87 |
summarized_text = ''.join(model(text_to_summarize, min_length=min_,max_length=max_)) if summary_type == "Extractive" else model(text_to_summarize, min_length=min_,max_length=max_)[0]['summary_text']
|
88 |
|
89 |
st.subheader("Original text")
|
|
|
3 |
from fake_useragent import UserAgent
|
4 |
from bs4 import BeautifulSoup
|
5 |
from transformers import pipeline
|
6 |
+
import re
|
7 |
#from newsplease import NewsPlease
|
8 |
import validators
|
9 |
import streamlit as st
|
|
|
22 |
return text
|
23 |
|
24 |
|
25 |
+
def preprocess_text(x):
|
26 |
+
x = x.encode("ascii", "ignore").decode() # unicode
|
27 |
+
x = re.sub(r"https*\S+", " ", x) # url
|
28 |
+
x = re.sub(r"@\S+", " ", x) # mentions
|
29 |
+
x = re.sub(r"#\S+", " ", x) # hastags
|
30 |
+
x = re.sub(r"\s{2,}", " ", x) # over spaces
|
31 |
+
x = re.sub("[^.,!?A-Za-z0-9]+", " ", x) # special charachters except .,!?
|
32 |
+
|
33 |
+
return x
|
34 |
|
35 |
@st.cache(allow_output_mutation=True)
|
36 |
def extractive_model():
|
|
|
86 |
summarize = st.button("Summarize")
|
87 |
|
88 |
if summarize:
|
89 |
+
text_to_summarize = preprocess_text(clean_text) if is_url else preprocess_text(plain_text)
|
90 |
|
91 |
with st.spinner(text="Loading Model and creating summary. This might take a few seconds depending on the length of your text..."):
|
92 |
model = model()
|
93 |
#summarized_text = text_to_summarize if len(text_to_summarize) > 60 else ''.join(model(body, min_length=60))
|
94 |
+
min_ = min(100,len(text_to_summarize)/5)
|
95 |
+
max_ = min(600,len(text_to_summarize)/2)
|
96 |
summarized_text = ''.join(model(text_to_summarize, min_length=min_,max_length=max_)) if summary_type == "Extractive" else model(text_to_summarize, min_length=min_,max_length=max_)[0]['summary_text']
|
97 |
|
98 |
st.subheader("Original text")
|