File size: 1,587 Bytes
4c8fe65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import logging
import sys
import os
from tweepy import API, OAuthHandler, Cursor
import json
from typing import List
from api.tokenize_tweets import normalize_tweets

logging.basicConfig(format='%(asctime)s: %(levelname)s:%(message)s',
                    level=logging.DEBUG)


def get_api(keys_file: str) -> API:
    if os.path.exists(keys_file):
        with open(keys_file) as f:
            key = json.load(f)
    else:
        logging.info("File %s does not exist in the specified path.", keys_file)
        sys.exit(0)
    # connect to twitter
    consumer_key, consumer_secret = key["CONSUMER_KEY"], key["CONSUMER_SECRET"]
    access_token, access_token_secret = key["ACCESS_TOKEN"], key["ACCESS_SECRET"]
    if all([consumer_key, consumer_key, access_token, access_token_secret]):
        auth = OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = API(auth, wait_on_rate_limit=True)
        return api
    else:
        logging.info("Please check twitter key entries and try again")
        sys.exit(0)


def download_tweets(api, keywords, max: int):
    tweets = []
    for keyword in keywords.split(','):
        logging.info("Searching '%s'", keyword)
        tweets += [status for status in
                   Cursor(api.search_tweets, q=keyword + " -RT -is:retweet lang:en", tweet_mode="extended").items(max)]
    return tweets


def use(keywords, n_tweets):
    api = get_api('api/resources/twitter_keys.json')
    tweets = download_tweets(api, keywords, n_tweets)
    return normalize_tweets(tweets)