import snscrape.modules.twitter as sntwitter import pandas as pd import datetime as dt from tqdm import tqdm import requests from scripts import sentiment def get_latest_account_tweets(handle): import tweepy import configparser config = configparser.ConfigParser() config.read("tweepy_auth.ini") # Get the authentication details authentication_section = config['AUTHENTICATION'] consumer_key = authentication_section["twitter_consumer_key"] consumer_secret = authentication_section["twitter_consumer_secret"] access_token = authentication_section["twitter_access_token"] access_token_secret = authentication_section["twitter_access_token_secret"] auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) # create the API object api = tweepy.API(auth) # load the tweets from a specific user tweets = api.user_timeline( screen_name=handle, count=10000000, tweet_mode="extended" ) df_tweets = pd.DataFrame(data=[t._json for t in tweets]) df_tweets["created_at"] = pd.DataFrame(df_tweets["created_at"]) df_tweets = df_tweets.sort_values("created_at") # print the tweet texts tweets_txt = [] for tweet in tweets: tweets_txt.append(sentiment.tweet_cleaner(tweet.full_text)) df_tweets["clean_text"] = tweets_txt df_tweets["handle"] = df_tweets.user.iloc[0]["screen_name"] return df_tweets def get_tweets( query: str, ) -> list: """ Fetches tweets from Twitter based on a given query and returns a list of extracted tweet information. Args: query (str): The query to search for tweets on Twitter. Returns: A list of extracted tweet information. """ print(f"Fetching tweets with query: {query}") fetched_tweets = sntwitter.TwitterSearchScraper(query).get_items() return [extract_tweet_info(tweet) for tweet in tqdm(fetched_tweets)] def get_replies(username: str, conversation_id: str, max_tweets: int) -> list: """ Fetches the replies for a given Twitter user and conversation, and returns a list of extracted tweet information. Args: username (str): The username of the Twitter user whose replies are to be fetched. conversation_id (str): The ID of the conversation for which replies are to be fetched. Returns: A list of extracted tweet information for the replies. """ print( f"Fetching replies for username {username} and conversation {conversation_id}" ) query = f"to:{username} since_id:{conversation_id} filter:safe" tweets_list = [] for i, tweet in tqdm(enumerate(sntwitter.TwitterSearchScraper(query).get_items())): if i > max_tweets: break else: tweets_list.append(extract_tweet_info(tweet)) return tweets_list def get_tweet_by_id_and_username(username: str, tweet_id: str): """ Fetches a tweet from Twitter based on the given username and tweet ID. Args: username (str): The username of the Twitter user who posted the tweet. tweet_id (str): The ID of the tweet to fetch. Returns: The fetched tweet. """ tweet_url = f"https://twitter.com/{username}/status/{tweet_id}" return sntwitter.TwitterSearchScraper(tweet_url).get_items() def extract_tweet_info(tweet): """ Extracts relevant information from a tweet object and returns a dictionary with the extracted values. Args: tweet: A tweet object. Returns: A dictionary with the extracted tweet information. """ return { "date": tweet.date, "username": tweet.user.username, "content": tweet.rawContent, "retweet_count": tweet.retweetCount, "tweet_id": tweet.id, "like_count": tweet.likeCount, "reply_count": tweet.replyCount, "in_reply_to_tweet_id": tweet.inReplyToTweetId, "conversation_id": tweet.conversationId, "view_count": tweet.viewCount, } def get_follower_ids(username: str, limit: int = 20): """ Retrieves a list of Twitter IDs for users who follow a given Twitter handle. Args: username (str): The Twitter handle to retrieve follower IDs for. limit (int): The maximum number of follower IDs to retrieve. Returns: A list of Twitter user IDs (as strings). """ # Construct the search query using snscrape query = f"from:{username} replies:True" start_date = dt.date(year=2023, month=3, day=10) end_date = dt.date(year=2023, month=3, day=22) query = f"from:{username} since:{start_date} until:{end_date}" tweets = get_tweets(query=query) one_tweet = tweets[-1] one_tweet_id = one_tweet["tweet_id"] replies = get_replies( username=username, conversation_id=one_tweet_id, max_tweets=1000 ) return one_tweet, replies def get_twitter_account_info(twitter_handle: str) -> dict: """ Extracts the name, username, follower count, and last tweet of a Twitter user using snscrape. Args: twitter_handle (str): The Twitter username to retrieve information for. Returns: dict: A dictionary containing the name, username, follower count, and last tweet of the Twitter user. """ # Create a TwitterUserScraper object user_scraper = sntwitter.TwitterUserScraper(twitter_handle) # Get the user's profile information user_profile = user_scraper.entity check_string = lambda s: "false" if str(s).lower() == "false" else "true" return { "name": user_profile.displayname, "username": user_profile.username, "user_id": user_profile.id, "follower_count": user_profile.followersCount, "friends_count": user_profile.friendsCount, "verified": check_string(user_profile.verified), } if __name__ == "__main__": ## Testing extracting tweets from an account # Set the search variables (dates for when account tweeted. Does not take into account replies) account = "taylorlorenz" start_date = dt.date(year=2023, month=2, day=1) end_date = dt.date(year=2023, month=3, day=11) # Format the query string query = f"from:{account} since:{start_date} until:{end_date}" print(f"query: {query}") tweets = get_tweets(query=query) df_tweets = pd.DataFrame(data=tweets) df_tweets = df_tweets.sort_values("in_reply_to_tweet_id") # Uncomment to save output df_tweets.to_csv("df_tweets.csv") print(df_tweets.head(2)) print(df_tweets.tail(2)) print(f"Total Tweets: {len(tweets)}") ## Testing extracting conversatin threeds from conversation Id conversation_id = ( 1620650202305798144 # A tweet from elon musk about turbulent times ) max_tweets = 3000 tweets = get_replies( username="elonmusk", conversation_id=conversation_id, max_tweets=max_tweets ) df_replies = pd.DataFrame(data=tweets) # Uncomment to save output # df_replies.to_csv("df_replies.csv") print( f"Number of extracted tweets from conversation_id: {conversation_id}, {len(tweets)}" )