File size: 4,119 Bytes
15ca093
e515291
86bdc44
297c37f
 
 
 
 
e515291
6015805
 
86bdc44
6015805
 
ec6943b
 
6015805
86bdc44
6015805
 
 
 
 
 
 
 
 
 
 
 
 
86bdc44
 
6ee7982
 
 
6015805
6ee7982
6015805
 
86bdc44
6015805
 
 
 
 
297c37f
6ee7982
 
 
6015805
6ee7982
 
 
 
6015805
 
 
297c37f
fae15d7
15ca093
6ee7982
86bdc44
 
6ee7982
2389616
6015805
 
 
 
 
 
86bdc44
 
2389616
6015805
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
from textclassifier import TextClassifier as tc
import pandas as pd
from functions import functions as f
import time

USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
             'dadgostarnooshi']

UserNameDict = dict(zip(['Jimmie Åkesson', 'Ebba Busch', 'Annie Lööf', 'Johan Pehrson', 'Per Bolund',
                         'Märta Stenevi', 'Magdalena Andersson', 'Nooshi Dadgostar'], USER_LIST))

Columns = ['username', 'nlikes', 'nreplies', 'nretweets', 'main_topic', 'sub_topic', 'sentiment', 'target', 'tweet',
           'date', 'urls', 'id', 'class_tuple', 'user_id']


def show_all_stats(ListChoices, SeeFullStats):
    dataframe = pd.read_csv("{}/data/twitterdata.csv".format(tc.ROOT_PATH))
    if SeeFullStats:
        return dataframe[ListChoices]
    else:
        return pd.DataFrame()


def fixChoicesCorrectOrder(Choices):
    ListChoices = [x for x in Columns if x in Choices]
    return ListChoices


def MatchNameToUser(Name):
    return UserNameDict[Name]


def main(From,
         To,
         Username,
         UserNameChoices,
         Nbr_Of_Tweets_To_Classify,
         ListChoices,
         SeeFullStats
         ):
    def WhoToScrape():
        if Username == "":
            return MatchNameToUser(UserNameChoices[0])
        else:
            return Username

    text_classifier = tc.TextClassifier(
        from_date=From,
        to_date=To,
        user_list=WhoToScrape(),
        num_tweets=int(Nbr_Of_Tweets_To_Classify))

    text_classifier.run_main_pipeline()
    dataframe = text_classifier.get_dataframe()
    dataframe = dataframe[
        ['username', 'nlikes', 'nreplies', 'nretweets', 'main_topic', 'sub_topic', 'sentiment', 'target', 'tweet',
         'date', 'urls', 'id', 'class_tuple', 'user_id']]

    return dataframe[fixChoicesCorrectOrder(ListChoices)], show_all_stats(fixChoicesCorrectOrder(ListChoices), SeeFullStats)


if __name__ == "__main__":
    from datetime import date

    demo = gr.Interface(
        article="</a><br>From = The date from which you want to start the analysis.</a><br> To = The date to which "
                "you want to end "
                "the analysis.</a><br> Username = The username of the user you want to analyze.</a><br> How many "
                "tweets to classify = "
                "The number of tweets you want to analyze.</a><br>",
        analytics_enabled=False,
        theme="default",
        title="Twitter data analysis",
        fn=main,
        inputs=[gr.components.Textbox(label="From", value='2022-01-01'),
                gr.components.Textbox(label="To", value='2022-01-25'),
                gr.components.Textbox(label="Username", value="BuschEbba"),
                gr.components.Checkboxgroup(
                    choices=['Jimmie Åkesson', 'Ebba Busch', 'Annie Lööf', 'Johan Pehrson', 'Per Bolund',
                             'Märta Stenevi',
                             'Magdalena Andersson', 'Nooshi Dadgostar'], label=""),
                gr.components.Textbox(label="How many Tweets to Classify", value="20"),
                gr.components.Checkboxgroup(label="Options",
                                            choices=['username', 'nlikes', 'nreplies', 'nretweets', 'main_topic',
                                                     'sub_topic', 'sentiment', 'target', 'tweet', 'date', 'urls', 'id',
                                                     'class_tuple', 'user_id'],
                                            value=['username', 'nlikes', 'nreplies', 'nretweets', 'main_topic',
                                                   'sub_topic', 'sentiment', 'target', 'tweet', 'date']
                                            ),
                gr.components.Checkbox(label="Show full statistics")
                ],
        outputs=[
            gr.components.DataFrame(label="Summary statistics of the intervall you selected", max_rows=None),
            gr.components.DataFrame(label="Summary statistics of the total database", max_rows=None, )])

    demo.launch(share=False)