from tkinter import EXCEPTION
import twint
from datetime import date
"""
This class is a twitter scraper called TwitterScraper. It takes the user as input and collects the user's tweets
from 'from_date' to 'to_date'. If 'from_date' and 'to_date' are not specified, it collects the number of tweets 'num_tweets' from today.
It outputs a dictionary with the tweet unique id and some other information.
input: user, from_date, to_date, num_tweets
output: dict
"""
class scraper:
    def __init__(self, from_date="2006-07-01", to_date=str(date.today()), num_tweets=20):   
        #self.user = user
        self.from_date = from_date
        self.to_date = to_date
        self.num_tweets = num_tweets
        self.conf = twint.Config()

    def scrape_by_user(self,_user):        
        ##using twint to extract tweets 
        self.conf.Search = "from:@" + _user   # If the search string is a username. 
        return self.__get_tweets_from_twint__()
    
    def scrape_by_string(self,_string:str):
        self.conf.Search = _string
        return self.__get_tweets_from_twint__()

    def scrape_by_user_and_string(self,_user:str,_string:str):       
        self.conf.Username = _user
        self.conf.Search = _string 
        return self.__get_tweets_from_twint__()

    def __get_only_tweets(tweet_and_replies):
        #This functions input arg is a data frame with tweets and removes all tweets with  starting with \"@\" which is indicator of a reply or retweet.  
        tweet=tweet_and_replies["tweet"]
        indx_replies=[]
        for i in range(len(tweet)):
            if tweet[i].startswith("@"):
                indx_replies.append(i)

        only_tweets=tweet_and_replies.drop(labels=indx_replies,axis=0)  
            # drop removes the columns which its index specified by indx_replies...   axis=0  if we want to delete rows, and inplace changes the same data_frame without creating a new. 
        #print(len(tweet_and_replies['tweet']), " of them are Tweets")               
        return only_tweets

    def __get_tweets_from_twint__(self):
        ''' __get_tweets_from_twint__ 
        tweet info is a dataframe with fallowing columns
            Index(['id', 'conversation_id', 'created_at', 'date', 'timezone', 'place',
            'tweet', 'language', 'hashtags', 'cashtags', 'user_id', 'user_id_str',
            'username', 'name', 'day', 'hour', 'link', 'urls', 'photos', 'video',
            'thumbnail', 'retweet', 'nlikes', 'nreplies', 'nretweets', 'quote_url',
            'search', 'near', 'geo', 'source', 'user_rt_id', 'user_rt',
            'retweet_id', 'reply_to', 'retweet_date', 'translate', 'trans_src',
            'trans_dest'] 
        we just pick the relevant ones. 
        c is a twint.Config() object
        '''          
        self.conf.Pandas = True
        self.conf.Count = True
        self.conf.Limit = self.num_tweets
        self.conf.Since = self.from_date
        self.conf.Until = self.to_date
        self.conf.Hide_output = True
        twint.run.Search(self.conf)    
        tweet_info =twint.output.panda.Tweets_df
        tweet_info = tweet_info[["id","tweet","date","user_id","username","urls" ,'nlikes', 'nreplies', 'nretweets']]
        df = scraper.__get_only_tweets(tweet_info)
        return df
    def __check_date_type(d1,d2):
            if (type(d1) or type(d2)) is not type("str"):  # If the type of ite date input isent string it generates exception
                print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
                raise EXCEPTION("Incorrect date type Exception!")
            elif (len(d1.split("-")) or len(d2.split("-")))<2:
                print("[!] Please make sure the date is a string in this format \"yyyy-mm-dd\" ")
                raise EXCEPTION("Incorrect date type Exception!")