import logging import sys import os from tweepy import API, OAuthHandler, Cursor import json from typing import List from api.tokenize_tweets import normalize_tweets logging.basicConfig(format='%(asctime)s: %(levelname)s:%(message)s', level=logging.DEBUG) def get_api(keys_file: str) -> API: if os.path.exists(keys_file): with open(keys_file) as f: key = json.load(f) else: logging.info("File %s does not exist in the specified path.", keys_file) sys.exit(0) # connect to twitter consumer_key, consumer_secret = key["CONSUMER_KEY"], key["CONSUMER_SECRET"] access_token, access_token_secret = key["ACCESS_TOKEN"], key["ACCESS_SECRET"] if all([consumer_key, consumer_key, access_token, access_token_secret]): auth = OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = API(auth, wait_on_rate_limit=True) return api else: logging.info("Please check twitter key entries and try again") sys.exit(0) def download_tweets(api, keywords, max: int): tweets = [] for keyword in keywords.split(','): logging.info("Searching '%s'", keyword) tweets += [status for status in Cursor(api.search_tweets, q=keyword + " -RT -is:retweet lang:en", tweet_mode="extended").items(max)] return tweets def use(keywords, n_tweets): api = get_api('api/resources/twitter_keys.json') tweets = download_tweets(api, keywords, n_tweets) return normalize_tweets(tweets)