aus10powell commited on
Commit
0fabb50
1 Parent(s): e442e8d

Update scripts/twitter_scraper.py

Browse files
Files changed (1) hide show
  1. scripts/twitter_scraper.py +58 -40
scripts/twitter_scraper.py CHANGED
@@ -5,48 +5,66 @@ from tqdm import tqdm
5
  import requests
6
  from scripts import sentiment
7
 
 
 
 
 
 
8
  def get_latest_account_tweets(handle):
9
- import tweepy
10
-
11
- import configparser
12
-
13
- import os
14
- if os.path.exists("tweepy_auth.ini"):
15
- config = configparser.ConfigParser()
16
- config.read("tweepy_auth.ini")
17
- # Get the authentication details
18
- authentication_section = config['AUTHENTICATION']
19
- consumer_key = authentication_section["twitter_consumer_key"]
20
- consumer_secret = authentication_section["twitter_consumer_secret"]
21
- access_token = authentication_section["twitter_access_token"]
22
- access_token_secret = authentication_section["twitter_access_token_secret"]
23
- else:
24
- consumer_key = os.environ['twitter_consumer_key']
25
- consumer_secret = os.environ["twitter_consumer_secret"]
26
- access_token = os.environ["twitter_access_token"]
27
- access_token_secret = os.environ["twitter_access_token_secret"]
28
-
29
- auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
30
- auth.set_access_token(access_token, access_token_secret)
31
-
32
- # create the API object
33
- api = tweepy.API(auth)
34
-
35
- # load the tweets from a specific user
36
- tweets = api.user_timeline(
37
- screen_name=handle, count=10000000, tweet_mode="extended"
38
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- df_tweets = pd.DataFrame(data=[t._json for t in tweets])
41
- df_tweets["created_at"] = pd.DataFrame(df_tweets["created_at"])
42
- df_tweets = df_tweets.sort_values("created_at")
43
- # print the tweet texts
44
- tweets_txt = []
45
- for tweet in tweets:
46
- tweets_txt.append(sentiment.tweet_cleaner(tweet.full_text))
47
- df_tweets["clean_text"] = tweets_txt
48
- df_tweets["handle"] = df_tweets.user.iloc[0]["screen_name"]
49
- return df_tweets
50
 
51
  def get_tweets(
52
  query: str,
 
5
  import requests
6
  from scripts import sentiment
7
 
8
+ import tweepy
9
+ import configparser
10
+ import os
11
+ import pandas as pd
12
+
13
  def get_latest_account_tweets(handle):
14
+ try:
15
+ if os.path.exists("tweepy_auth.ini"):
16
+ config = configparser.ConfigParser()
17
+ config.read("tweepy_auth.ini")
18
+ # Get the authentication details
19
+ authentication_section = config['AUTHENTICATION']
20
+ consumer_key = authentication_section["twitter_consumer_key"]
21
+ consumer_secret = authentication_section["twitter_consumer_secret"]
22
+ access_token = authentication_section["twitter_access_token"]
23
+ access_token_secret = authentication_section["twitter_access_token_secret"]
24
+ else:
25
+ consumer_key = os.environ['twitter_consumer_key']
26
+ consumer_secret = os.environ["twitter_consumer_secret"]
27
+ access_token = os.environ["twitter_access_token"]
28
+ access_token_secret = os.environ["twitter_access_token_secret"]
29
+
30
+ auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
31
+ auth.set_access_token(access_token, access_token_secret)
32
+
33
+ # create the API object
34
+ api = tweepy.API(auth)
35
+
36
+ # load the tweets from a specific user
37
+ tweets = api.user_timeline(
38
+ screen_name=handle, count=10000000, tweet_mode="extended"
39
+ )
40
+
41
+ df_tweets = pd.DataFrame(data=[t._json for t in tweets])
42
+ df_tweets["created_at"] = pd.to_datetime(df_tweets["created_at"])
43
+ df_tweets = df_tweets.sort_values("created_at")
44
+
45
+ # print the tweet texts
46
+ tweets_txt = []
47
+ for tweet in tweets:
48
+ tweets_txt.append(sentiment.tweet_cleaner(tweet.full_text))
49
+ df_tweets["clean_text"] = tweets_txt
50
+ df_tweets["handle"] = df_tweets.user.iloc[0]["screen_name"]
51
+
52
+ return df_tweets
53
+
54
+ except tweepy.TweepError as e:
55
+ # Handle specific error conditions
56
+ if e.api_code == 63:
57
+ print("User has been suspended.")
58
+ elif e.api_code == 88:
59
+ print("Rate limit exceeded. Please try again later.")
60
+ else:
61
+ print("Error occurred during API call:", str(e))
62
+
63
+ except Exception as e:
64
+ print("An error occurred:", str(e))
65
+
66
+ return None
67
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  def get_tweets(
70
  query: str,