CEC-Learning / News.py
Jeff Myers II
Only General and Technology categories.
7ba0657
from newsapi import NewsApiClient
from newspaper import Article
import os
__export__ = ["News"]
class News:
__EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek", "Politico"]
__CATEGORIES__ = [
"General",
# "Business",
# "Entertainment",
# "Health",
# "Science",
"Technology"
]
def __init__(self):
newsapi_key = os.environ.get("NEWS_API_KEY")
self.newsapi = NewsApiClient(api_key=newsapi_key)
def get_sources(self, category=None):
sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"]
sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__}
return sources
def get_top_headlines(self, num_headlines=5, category=None):
sources = self.get_sources(category.lower() if category else category)
headlines = self.newsapi.get_top_headlines(
sources=", ".join(sources),
page_size=num_headlines,
language="en",
)["articles"]
headlines = self._get_articles_from_headlines(headlines)
return headlines
def get_headlines(self, num_headlines=5, query=None):
sources = self.get_sources()
headlines = self.newsapi.get_everything(
q=query,
sources=", ".join(sources),
page_size=num_headlines,
lanuguage="en",
)["articles"]
headlines = self._get_articles_from_headlines(headlines)
return headlines
def _get_articles_from_headlines(self, headlines):
for headline in headlines:
del headline["author"]
headline["source"] = headline["source"]["name"]
article = Article(headline["url"])
article.download()
article.parse()
headline["content"] = article.text
return headlines