yassTrad commited on
Commit
be2791c
1 Parent(s): e1a823f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -14,9 +14,9 @@ g = Goose()
14
  def article_text_extractor(url: str):
15
  '''Extract text from url'''
16
  paper = g.extract(url=url)
17
- #first_sentence = list(filter(None, paper.cleaned_text.split("\n")))[0]
18
- text = paper.cleaned_text if "reuters" not in url else " ".join(list(filter(None, paper.cleaned_text.split("\n")))[:-1])
19
- #text = text if "REUTERS" not in first_sentence else " ".join(list(filter(None, text.split("\n")))[1:])
20
  return text
21
 
22
  def article_text_extractor_(url: str):
 
14
  def article_text_extractor(url: str):
15
  '''Extract text from url'''
16
  paper = g.extract(url=url)
17
+ first_sentence = list(filter(None, paper.cleaned_text.split("\n")))[0]
18
+ text = paper.cleaned_text if "reuters" not in url else "\n".join(list(filter(None, paper.cleaned_text.split("\n")))[:-1])
19
+ text = text if "REUTERS" not in first_sentence else "\n".join(list(filter(None, text.split("\n")))[1:])
20
  return text
21
 
22
  def article_text_extractor_(url: str):