Spaces:

lalithadevi
/

latest_news_backend_with_cat_pred_similar_news_

Running

lalithadevi commited on Jun 19, 2024

Commit

1843dc6

verified ·

1 Parent(s): 1f1a45a

Update news_extractor/news_extractor.py

Files changed (1) hide show

news_extractor/news_extractor.py CHANGED Viewed

@@ -40,10 +40,10 @@ def rss_parser(i):
     """
     try:
         b1 = BeautifulSoup(str(i), "xml")
-        title = "" if b1.find("title") is None else b1.find("title").get_text()
         title = text_clean(title)
-        url = "" if b1.find("link") is None else b1.find("link").get_text()
-        desc = "" if b1.find("description") is None else b1.find("description").get_text()
         desc = text_clean(desc)
         desc = f'{desc[:300]}...' if len(desc) >= 300 else desc
         date = "Sat, 12 Aug 2000 13:39:15 +05:30" if ((b1.find("pubDate") == "") or (b1.find("pubDate") is None)) else b1.find("pubDate").get_text()

     """
     try:
         b1 = BeautifulSoup(str(i), "xml")
+        title = "" if b1.find("title") is None else b1.find("title").get_text().strip()
         title = text_clean(title)
+        url = "" if b1.find("link") is None else b1.find("link").get_text().strip()
+        desc = "" if b1.find("description") is None else b1.find("description").get_text().strip()
         desc = text_clean(desc)
         desc = f'{desc[:300]}...' if len(desc) >= 300 else desc
         date = "Sat, 12 Aug 2000 13:39:15 +05:30" if ((b1.find("pubDate") == "") or (b1.find("pubDate") is None)) else b1.find("pubDate").get_text()