import pandas as pd
import numpy as np
import datetime
import tqdm
import gradio as gr
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import snscrape.modules.twitter as sntwitter

matplotlib.use("Agg")

css = """
footer {display:none !important}
.max-h-\[30rem\] {max-height: 15rem !important;}
.min-h-\[15rem\] {max-height: 5rem !important;}
.hover\:bg-orange-50:hover {
    --tw-bg-opacity: 1 !important;
    background-color: rgb(229,225,255) !important;
}
"""

with gr.Blocks(title="Twitter Temporal Insights | Data Science Dojo", css = css) as demo:
  def search(text,username,since,until,retweet,replies):
    global filename
    q = text
    if username!='':
        q += f" from:{username}"    
    if until=='':
        until = datetime.datetime.strftime(datetime.date.today(), '%Y-%m-%d')
    q += f" until:{until}"
    if since=='':
        since = datetime.datetime.strftime(datetime.datetime.strptime(until, '%Y-%m-%d') - datetime.timedelta(days=7), '%Y-%m-%d')
    q += f" since:{since}"
    if retweet == True:
        q += f" exclude:retweets"
    if replies == True:
        q += f" exclude:replies"
    if username!='' and text!='':
        filename = f"{since}_{until}_{username}_{text}.csv"
    elif username!="":
        filename = f"{since}_{until}_{username}.csv"
    else:
        filename = f"{since}_{until}_{text}.csv"
    print(filename)
    return q,filename

  def file_viz(dataset):
    tweets_df1 = pd.read_csv(dataset.name)
    since= str(min(tweets_df1['Year']))
    until = str(max(tweets_df1['Year']))
    print(dataset.name)
    f, ax = plt.subplots()
    sns.countplot(x= tweets_df1['Year'])
    for p in ax.patches:
        ax.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+5), fontsize = 12)

    f2,ax2 = plt.subplots()
    sns.lineplot(x=tweets_df1.Year.value_counts().index,y=tweets_df1.Year.value_counts().values)
    ax2.set_xlabel("Year")
    ax2.set_ylabel('Count')
    ax2.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f3,ax3 = plt.subplots()
    sns.histplot(x=tweets_df1.Year,stat='count',binwidth=1,kde='true',discrete=True)
    ax3.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f4,ax4 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Year,fill=True)
    ax4.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f5,ax5 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Year,fill=True,bw_adjust=3)
    ax5.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f6, ax6 = plt.subplots()
    sns.countplot(x= tweets_df1['Month'])
    for p in ax6.patches:
        ax6.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+5), fontsize = 12)

    f7,ax7 = plt.subplots()
    sns.lineplot(x=tweets_df1.Month.value_counts().index,y=tweets_df1.Month.value_counts().values)
    ax7.set_xlabel("Month")
    ax7.set_ylabel('Count')
    ax7.set_xticks(np.arange(1,13,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f8,ax8 = plt.subplots()
    sns.histplot(x=tweets_df1.Month,stat='count',binwidth=1,kde='true',discrete=True)
    ax8.set_xticks(np.arange(1,13,1))

    f9,ax9 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Month,fill=True)
    ax9.set_xticks(np.arange(1,13,1))

    f10,ax10 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Month,fill=True,bw_adjust=3)
    ax10.set_xticks(np.arange(1,13,1))
    
    f11, ax11 = plt.subplots()
    sns.countplot(x= tweets_df1['Week'])
    for p in ax11.patches:
      ax11.annotate(int(p.get_height()), (p.get_x()+0.005, p.get_height()+1), fontsize = 10)
    plt.xticks(fontsize=7, rotation=45,horizontalalignment = 'center')
    #plt.setp(ax11.get_xticklabels(), rotation=30)

    f12,ax12 = plt.subplots()
    sns.lineplot(x=tweets_df1.Week.value_counts().index,y=tweets_df1.Week.value_counts().values)
    ax12.set_xlabel("Week")
    ax12.set_ylabel('Count')
    #ax12.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f13,ax13 = plt.subplots()
    sns.histplot(x=tweets_df1.Week,stat='count',binwidth=1,kde='true',discrete=True)
    #ax13.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f14,ax14 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Week,fill=True)
    #ax14.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f15,ax15 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Week,fill=True,bw_adjust=3)
    #ax15.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f16, ax16 = plt.subplots()
    sns.countplot(x= tweets_df1['MonthDay'])
    for p in ax16.patches:
        ax16.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+5), fontsize = 12)
    plt.xticks(fontsize=10, rotation=45,horizontalalignment = 'center')

    f17,ax17 = plt.subplots()
    sns.lineplot(x=tweets_df1.MonthDay.value_counts().index,y=tweets_df1.MonthDay.value_counts().values)
    ax17.set_xlabel("MonthDay")
    ax17.set_ylabel('Count')
    #ax17.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f18,ax18 = plt.subplots()
    sns.histplot(x=tweets_df1.MonthDay,stat='count',binwidth=1,kde='true',discrete=True)
    #ax18.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f19,ax19 = plt.subplots()
    sns.kdeplot(x=tweets_df1.MonthDay,fill=True)
    #ax19.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f20,ax20 = plt.subplots()
    sns.kdeplot(x=tweets_df1.MonthDay,fill=True,bw_adjust=3)
    #ax20.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f21, ax21 = plt.subplots()
    sns.countplot(x= tweets_df1['Hour'])
    for p in ax21.patches:
        ax21.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+10), fontsize = 12)

    f22,ax22 = plt.subplots()
    sns.lineplot(x=tweets_df1.Hour.value_counts().index,y=tweets_df1.Hour.value_counts().values)
    ax22.set_xlabel("Hour")
    ax22.set_ylabel('Count')
    #ax22.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f23,ax23 = plt.subplots()
    sns.histplot(x=tweets_df1.Hour,stat='count',binwidth=1,kde='true',discrete=True)
    #ax23.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f24,ax24 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Hour,fill=True)
    #ax4.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f25,ax25 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Hour,fill=True,bw_adjust=3)
    #ax5.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    tweets_df1.to_csv(dataset.name,index=False)

    return [dataset.name,f,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25]
      
  def scrape_tweets(text,username,since,until,retweets,replies,count,progress=gr.Progress()):
    print(text,username,since,until,retweets,replies,count)
    q,filename = search(text,username,since,until,retweets,replies)
    # Creating list to append tweet data 
    tweets_list1 = []

    # Using TwitterSearchScraper to scrape data and append tweets to list
    if count == -1:
      for i,tweet in progress.tqdm(enumerate(sntwitter.TwitterSearchScraper(q).get_items())):
        tweets_list1.append([tweet.date, tweet.id, tweet.rawContent, tweet.user.username,tweet.lang,tweet.hashtags,tweet.replyCount,tweet.retweetCount,tweet.likeCount,tweet.quoteCount,tweet.media])
    else:
      for i,tweet in progress.tqdm(enumerate(sntwitter.TwitterSearchScraper(q).get_items())):
        if i>=count: #number of tweets you want to scrape
          break
        tweets_list1.append([tweet.date, tweet.id, tweet.rawContent, tweet.user.username,tweet.lang,tweet.hashtags,tweet.replyCount,tweet.retweetCount,tweet.likeCount,tweet.quoteCount,tweet.media])
            #    pbar.update(1)
    # Creating a dataframe from the tweets list above 
    tweets_df1 = pd.DataFrame(tweets_list1, columns=['DateTime', 'TweetId', 'Text', 'Username','Language',
                                    'Hashtags','ReplyCount','RetweetCount','LikeCount','QuoteCount','Media'])
    #print(tweets_df1)
    tweets_df1['Hour'] = tweets_df1['DateTime'].dt.hour
    tweets_df1['Year'] = tweets_df1['DateTime'].dt.year  
    tweets_df1['Month'] = tweets_df1['DateTime'].dt.month
    tweets_df1['MonthName'] = tweets_df1['DateTime'].dt.month_name()
    tweets_df1['MonthDay'] = tweets_df1['DateTime'].dt.day
    tweets_df1['DayName'] = tweets_df1['DateTime'].dt.day_name()
    tweets_df1['Week'] = tweets_df1['DateTime'].dt.isocalendar().week
    tweets_df1['Date'] = [d.date() for d in tweets_df1['DateTime']]
    tweets_df1['Time'] = [d.time() for d in tweets_df1['DateTime']]
    tweets_df1.drop('DateTime',axis=1,inplace=True)
    tweets_df1.drop('Media',axis=1,inplace=True)

    tweets_df1=tweets_df1.reindex(columns=['Date','Time','Username','Text','Language','Hashtags','ReplyCount','RetweetCount','LikeCount','QuoteCount','Hour','Year','Month','MonthName','MonthDay','DayName','Week','TweetId'])

    tweets_df1.to_csv(f"{filename}",index=False)

    '''fig,ax = plt.subplots()
    plt.plot(df["day"], df[countries].to_numpy())

    #plt.title("Outbreak in " + month)
    #plt.ylabel("Cases")
    #plt.xlabel("Days since Day 0")
    #plt.legend(countries)
    return fig'''

    f, ax = plt.subplots()
    sns.countplot(x= tweets_df1['Year'])
    for p in ax.patches:
        ax.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+5), fontsize = 12)

    f2,ax2 = plt.subplots()
    sns.lineplot(x=tweets_df1.Year.value_counts().index,y=tweets_df1.Year.value_counts().values)
    ax2.set_xlabel("Year")
    ax2.set_ylabel('Count')
    ax2.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f3,ax3 = plt.subplots()
    sns.histplot(x=tweets_df1.Year,stat='count',binwidth=1,kde='true',discrete=True)
    ax3.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f4,ax4 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Year,fill=True)
    ax4.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f5,ax5 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Year,fill=True,bw_adjust=3)
    ax5.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f6, ax6 = plt.subplots()
    sns.countplot(x= tweets_df1['Month'])
    for p in ax6.patches:
        ax6.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+5), fontsize = 12)

    f7,ax7 = plt.subplots()
    sns.lineplot(x=tweets_df1.Month.value_counts().index,y=tweets_df1.Month.value_counts().values)
    ax7.set_xlabel("Month")
    ax7.set_ylabel('Count')
    ax7.set_xticks(np.arange(1,13,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f8,ax8 = plt.subplots()
    sns.histplot(x=tweets_df1.Month,stat='count',binwidth=1,kde='true',discrete=True)
    ax8.set_xticks(np.arange(1,13,1))

    f9,ax9 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Month,fill=True)
    ax9.set_xticks(np.arange(1,13,1))

    f10,ax10 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Month,fill=True,bw_adjust=3)
    ax10.set_xticks(np.arange(1,13,1))
    
    f11, ax11 = plt.subplots()
    sns.countplot(x= tweets_df1['Week'])
    for p in ax11.patches:
      ax11.annotate(int(p.get_height()), (p.get_x()+0.005, p.get_height()+1), fontsize = 10)
    plt.xticks(fontsize=7, rotation=45,horizontalalignment = 'center')
    #plt.setp(ax11.get_xticklabels(), rotation=30)

    f12,ax12 = plt.subplots()
    sns.lineplot(x=tweets_df1.Week.value_counts().index,y=tweets_df1.Week.value_counts().values)
    ax12.set_xlabel("Week")
    ax12.set_ylabel('Count')
    #ax12.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f13,ax13 = plt.subplots()
    sns.histplot(x=tweets_df1.Week,stat='count',binwidth=1,kde='true',discrete=True)
    #ax13.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f14,ax14 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Week,fill=True)
    #ax14.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f15,ax15 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Week,fill=True,bw_adjust=3)
    #ax15.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f16, ax16 = plt.subplots()
    sns.countplot(x= tweets_df1['MonthDay'])
    for p in ax16.patches:
        ax16.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+5), fontsize = 12)
    plt.xticks(fontsize=10, rotation=45,horizontalalignment = 'center')

    f17,ax17 = plt.subplots()
    sns.lineplot(x=tweets_df1.MonthDay.value_counts().index,y=tweets_df1.MonthDay.value_counts().values)
    ax17.set_xlabel("MonthDay")
    ax17.set_ylabel('Count')
    #ax17.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f18,ax18 = plt.subplots()
    sns.histplot(x=tweets_df1.MonthDay,stat='count',binwidth=1,kde='true',discrete=True)
    #ax18.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f19,ax19 = plt.subplots()
    sns.kdeplot(x=tweets_df1.MonthDay,fill=True)
    #ax19.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f20,ax20 = plt.subplots()
    sns.kdeplot(x=tweets_df1.MonthDay,fill=True,bw_adjust=3)
    #ax20.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f21, ax21 = plt.subplots()
    sns.countplot(x= tweets_df1['Hour'])
    for p in ax21.patches:
        ax21.annotate(int(p.get_height()), (p.get_x()+0.05, p.get_height()+10), fontsize = 12)

    f22,ax22 = plt.subplots()
    sns.lineplot(x=tweets_df1.Hour.value_counts().index,y=tweets_df1.Hour.value_counts().values)
    ax22.set_xlabel("Hour")
    ax22.set_ylabel('Count')
    #ax22.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))
    #f2 = plt.figure()
    #plt.plot(np.arange(2021,2023,1), tweets_df1.Year.value_counts())
    
    f23,ax23 = plt.subplots()
    sns.histplot(x=tweets_df1.Hour,stat='count',binwidth=1,kde='true',discrete=True)
    #ax23.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f24,ax24 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Hour,fill=True)
    #ax4.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    f25,ax25 = plt.subplots()
    sns.kdeplot(x=tweets_df1.Hour,fill=True,bw_adjust=3)
    #ax5.set_xticks(np.arange(int(since[0:4]),int(until[0:4])+1,1))

    #if dataframe==False:
        #return [filename,gr.update(value = tweets_df1,visible=False),f,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25]
        #return [filename,f,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25]
    #else:
        #return [filename,gr.update(value = tweets_df1,visible=True),f,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25]
    return [filename,f,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13,f14,f15,f16,f17,f18,f19,f20,f21,f22,f23,f24,f25]
    #gr.Markdown("Start typing below and then click **Run** to see the output.")
  with gr.Tab("Input"):
    with gr.Row():
        text = gr.Textbox(label="Query text to be matched (Optional)",max_lines=1)
        username = gr.Textbox(label="Twitter Username",max_lines=1,value = 'DataScienceDojo') 
    with gr.Row():
        since = gr.Textbox(label="Start Date",placeholder='yyyy-mm-dd',max_lines=1,value = '2021-01-01')
        until = gr.Textbox(label="End Date",max_lines=1,placeholder='yyyy-mm-dd',value = '2022-12-31')
    with gr.Row():
        retweets = gr.Checkbox(label="Exclude Retweets?",value = True)
        replies = gr.Checkbox(label="Exclude Replies",value = True)
    with gr.Row():
        count = gr.Slider(label="Count (-1 to retrieve all tweets. Increase the count for better visualizations. 5000 Tweets are retrieved in approximately 100s!)",value=-1, minimum=-1,maximum = 100000, step=100)
        #dataframe = gr.Checkbox(label="Show Dataframe? It is better to uncheck this option if a large number of tweets are retrieved!")
    with gr.Row():
        submit_btn = gr.Button("Submit")
    with gr.Row():
        gr.Markdown("""<h1 style= "text-align:center; z-index: 14; font-family: var(--font_default); font-size: 18px; font-weight: 500; color: rgb(9, 23, 71); opacity: 1;">OR Upload File<h1>""")
    with gr.Row():
        filein = gr.File(label = "Upload previously downloaded csv file. Example given below!")
    with gr.Row():
        submit_file = gr.Button("Submit File")     
    with gr.Row():
        out0 = gr.File(label = "Download CSV of extracted tweets")
    #with gr.Row():
        #out = gr.DataFrame(label = "Scroll Horizontally to see all fields. Dataframe will display when the number of steps reaches a constant value.")

  with gr.Tab("Visualization by Hour"):
    with gr.Row():
      out22 = gr.Plot()
      out23 = gr.Plot()
    with gr.Row():
      out24 = gr.Plot()
      out25 = gr.Plot()
    with gr.Row():
      out26 = gr.Plot()
  with gr.Tab("Visualization by Day"):
    with gr.Row():
      out17 = gr.Plot()
      out18 = gr.Plot()
    with gr.Row():
      out19 = gr.Plot()
      out20 = gr.Plot()
    with gr.Row():
      out21 = gr.Plot()
  with gr.Tab("Visualization by Week"):
    with gr.Row():
      out12 = gr.Plot()
      out13 = gr.Plot()
    with gr.Row():
      out14 = gr.Plot()
      out15 = gr.Plot()
    with gr.Row():
      out16 = gr.Plot()
  with gr.Tab("Visualization by Month"):
    with gr.Row():
      out7 = gr.Plot()
      out8 = gr.Plot()
    with gr.Row():
      out9 = gr.Plot()
      out10 = gr.Plot()
    with gr.Row():
      out11 = gr.Plot()
  with gr.Tab("Visualization by Year"):
    with gr.Row():
      out2 = gr.Plot()
      out3 = gr.Plot()
    with gr.Row():
      out4 = gr.Plot()
      out5 = gr.Plot()
    with gr.Row():
      out6 = gr.Plot()

  gr.Examples(
      examples=[["DSD_Tweets.csv"]],
      fn = file_viz,
      inputs = filein, 
      outputs=[out0,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13,out14,out15,out16,out17,out18,out19,out20,out21,out22,out23,out24,out25,out26]
      )
  submit_file.click(fn=file_viz,inputs = filein, outputs=[out0,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13,out14,out15,out16,out17,out18,out19,out20,out21,out22,out23,out24,out25,out26])
  submit_btn.click(fn=scrape_tweets, inputs=[text,username,since,until,retweets,replies,count], outputs=[out0,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13,out14,out15,out16,out17,out18,out19,out20,out21,out22,out23,out24,out25,out26])


demo.queue(concurrency_count=5).launch()