Spaces:
Running
Running
from newsapi import NewsApiClient | |
from newspaper import Article | |
import os | |
__export__ = ["News"] | |
class News: | |
__EX_SOURCES__ = ["ABC News", "Bloomberg", "The Hill", "Fox Sports", "Google News", "Newsweek", "Politico"] | |
__CATEGORIES__ = [ | |
"General", | |
# "Business", | |
# "Entertainment", | |
# "Health", | |
# "Science", | |
"Technology" | |
] | |
def __init__(self): | |
newsapi_key = os.environ.get("NEWS_API_KEY") | |
self.newsapi = NewsApiClient(api_key=newsapi_key) | |
def get_sources(self, category=None): | |
sources = self.newsapi.get_sources(language="en", country="us", category=category.lower() if category else category)["sources"] | |
sources = {source["name"] for source in sources if source["name"] not in self.__EX_SOURCES__} | |
return sources | |
def get_top_headlines(self, num_headlines=5, category=None): | |
sources = self.get_sources(category.lower() if category else category) | |
headlines = self.newsapi.get_top_headlines( | |
sources=", ".join(sources), | |
page_size=num_headlines, | |
language="en", | |
)["articles"] | |
headlines = self._get_articles_from_headlines(headlines) | |
return headlines | |
def get_headlines(self, num_headlines=5, query=None): | |
sources = self.get_sources() | |
headlines = self.newsapi.get_everything( | |
q=query, | |
sources=", ".join(sources), | |
page_size=num_headlines, | |
lanuguage="en", | |
)["articles"] | |
headlines = self._get_articles_from_headlines(headlines) | |
return headlines | |
def _get_articles_from_headlines(self, headlines): | |
for headline in headlines: | |
del headline["author"] | |
headline["source"] = headline["source"]["name"] | |
article = Article(headline["url"]) | |
article.download() | |
article.parse() | |
headline["content"] = article.text | |
return headlines | |