news-api / news_data.py
radames's picture
Update news_data.py
ce7119a verified
import feedparser
from pathlib import Path
import json
from db import Database
from apscheduler.schedulers.background import BackgroundScheduler
from datetime import datetime
scheduler = BackgroundScheduler()
database = Database(Path("./"))
def get_feed(feed_url):
feed = feedparser.parse(feed_url)
return {"entries": feed["entries"]}
def cache_news():
print("Caching news")
data = []
for feed in TOP_NEWS_FEEDS:
url = feed["url"]
label = feed["label"]
print(f"Fetching {label} from {url}")
try:
feed = get_feed(url)
data.append((url, label, json.dumps(feed)))
except Exception as e:
print(f"Failed to fetch {label} from {url}: {e}")
database.insert(data)
scheduler.add_job(
cache_news, "interval", hours=1, id="cache_news", next_run_time=datetime.now()
)
TOP_NEWS_FEEDS = [
{"label": "BBC World News", "url": "http://feeds.bbci.co.uk/news/world/rss.xml"},
{
"label": "Reddit World News",
"url": "https://www.reddit.com/r/worldnews/top/.rss",
},
{"label": "Vox", "url": "http://www.vox.com/rss/index.xml"},
{"label": "CBS News", "url": "https://www.cbsnews.com/latest/rss/main"},
{"label": "ABC News", "url": "http://abcnews.go.com/abcnews/topstories"},
{"label": "CNN Top Stories", "url": "http://rss.cnn.com/rss/cnn_topstories.rss"},
{"label": "CNN World News", "url": "http://rss.cnn.com/rss/cnn_world.rss"},
{
"label": "The New York Times",
"url": "http://www.nytimes.com/services/xml/rss/nyt/HomePage.xml",
},
{
"label": "The Economist",
"url": "http://www.economist.com/sections/business-finance/rss.xml",
},
{"label": "The Guardian", "url": "https://www.theguardian.com/international/rss"},
{"label": "NPR", "url": "http://www.npr.org/rss/rss.php?id=1001"},
{"label": "Al Jazeera", "url": "https://www.aljazeera.com/xml/rss/all.xml"},
{
"label": "The Guardian World News",
"url": "https://www.theguardian.com/world/rss",
},
{"label": "The Atlantic", "url": "https://www.theatlantic.com/feed/all/"},
{"label": "Vice", "url": "http://www.vice.com/rss"},
{
"label": "The New York Times",
"url": "https://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
},
{
"label": "The New Yorker",
"url": "http://www.newyorker.com/services/rss/feeds/everything.xml",
},
{"label": "Pew Research Center", "url": "http://www.pewresearch.org/feed/"},
{"label": "Fox News", "url": "http://feeds.feedburner.com/foxnews/latest"},
{
"label": "The Washington Post",
"url": "http://feeds.washingtonpost.com/rss/world",
},
{"label": "The Guardian UK", "url": "https://www.theguardian.com/uk/rss"},
{"label": "TIME", "url": "http://rss.time.com/web/time/rss/top/index.xml"},
{
"label": "The New York Times",
"url": "http://rss.nytimes.com/services/xml/rss/nyt/HomePage.xml",
},
{"label": "NPR", "url": "https://feeds.npr.org/1001/rss.xml"},
{"label": "Fortune", "url": "http://fortune.com/feed/"},
{"label": "Fox News", "url": "http://feeds.foxnews.com/foxnews/latest"},
{
"label": "BBC World News",
"url": "http://newsrss.bbc.co.uk/rss/newsonline_world_edition/front_page/rss.xml",
},
{"label": "Al Jazeera", "url": "http://www.aljazeera.com/xml/rss/all.xml"},
{"label": "Le Monde", "url": "https://www.lemonde.fr/rss/une.xml"},
{"label": "Vox", "url": "https://www.vox.com/rss/index.xml"},
{
"label": "The New York Times",
"url": "http://rss.nytimes.com/services/xml/rss/nyt/World.xml",
},
{"label": "The Guardian US", "url": "https://www.theguardian.com/us/rss"},
{"label": "ProPublica", "url": "http://feeds.propublica.org/propublica/main"},
{"label": "The Washington Post", "url": "https://feedx.net/rss/washingtonpost.xml"},
{"label": "Axios", "url": "https://api.axios.com/feed/top/"},
{"label": "RT", "url": "https://www.rt.com/rss/"},
{"label": "ABC News US", "url": "http://feeds.abcnews.com/abcnews/usheadlines"},
{"label": "CNN US", "url": "http://rss.cnn.com/rss/cnn_topstories.rss"},
{"label": "CBS News", "url": "http://www.cbsnews.com/latest/rss/main"},
{
"label": "The Wall Street Journal",
"url": "http://online.wsj.com/xml/rss/3_7085.xml",
},
{
"label": "USA Today",
"url": "http://content.usatoday.com/marketing/rss/rsstrans.aspx?feedId=news2",
},
{
"label": "The Christian Science Monitor",
"url": "http://rss.csmonitor.com/feeds/usa",
},
{
"label": "NBC News Top Stories",
"url": "http://feeds.nbcnews.com/feeds/topstories",
},
{"label": "NBC News World News", "url": "http://feeds.nbcnews.com/feeds/worldnews"},
{
"label": "Reuters World News",
"url": "http://feeds.reuters.com/Reuters/worldNews",
},
{
"label": "Reuters US News",
"url": "http://feeds.reuters.com/Reuters/domesticNews",
},
{
"label": "Associated Press US Headlines",
"url": "http://hosted.ap.org/lineups/USHEADS.rss",
},
{
"label": "Associated Press World Headlines",
"url": "http://hosted.ap.org/lineups/WORLDHEADS.rss",
},
{
"label": "HuffPost World News",
"url": "http://www.huffingtonpost.com/feeds/verticals/world/index.xml",
},
{
"label": "BBC News US and Canada",
"url": "http://feeds.bbci.co.uk/news/world/us_and_canada/rss.xml",
},
{"label": "Yahoo News US", "url": "http://news.yahoo.com/rss/us"},
{"label": "Yahoo News World", "url": "http://rss.news.yahoo.com/rss/world"},
{"label": "Newsweek", "url": "http://www.newsweek.com/rss"},
{
"label": "The Daily Beast",
"url": "http://feeds.feedburner.com/thedailybeast/articles",
},
{"label": "Quartz", "url": "http://qz.com/feed"},
{"label": "The Guardian USA", "url": "http://www.theguardian.com/world/usa/rss"},
{"label": "Politico", "url": "http://www.politico.com/rss/politicopicks.xml"},
{"label": "The New Yorker News", "url": "http://www.newyorker.com/feed/news"},
{"label": "PBS NewsHour", "url": "http://feeds.feedburner.com/NationPBSNewsHour"},
{"label": "PBS NewsHour World", "url": "http://feeds.feedburner.com/NewshourWorld"},
{"label": "NPR Politics", "url": "http://www.npr.org/rss/rss.php?id=1003"},
{"label": "NPR World", "url": "http://www.npr.org/rss/rss.php?id=1004"},
{
"label": "The Atlantic National",
"url": "http://feeds.feedburner.com/AtlanticNational",
},
{
"label": "The Atlantic Wire",
"url": "http://feeds.feedburner.com/TheAtlanticWire",
},
{
"label": "Los Angeles Times US",
"url": "http://www.latimes.com/nation/rss2.0.xml",
},
{
"label": "Los Angeles Times World",
"url": "http://www.latimes.com/world/rss2.0.xml",
},
{
"label": "Breaking News",
"url": "http://api.breakingnews.com/api/v1/item/?format=rss",
},
{"label": "VICE News", "url": "https://news.vice.com/rss"},
{
"label": "Talking Points Memo",
"url": "http://talkingpointsmemo.com/feed/livewire",
},
{"label": "TIME Newsfeed", "url": "http://time.com/newsfeed/feed/"},
{"label": "Fox News", "url": "http://feeds.foxnews.com/foxnews/latest?format=xml"},
{"label": "Mashable US & World", "url": "http://mashable.com/us-world/rss/"},
]