File size: 2,751 Bytes
15ca093
e515291
297c37f
 
 
 
 
e515291
5214b07
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec6943b
 
6ee7982
 
 
 
 
 
 
 
 
297c37f
6ee7982
 
 
 
 
 
 
 
 
297c37f
ec6943b
5214b07
15ca093
6ee7982
 
2389616
 
6ee7982
 
2389616
 
6ee7982
 
2389616
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
from textclassifier import TextClassifier as tc
from functions import functions as f
import time

USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
             'dadgostarnooshi']

def get_summary_statistics(dataframe):
    """
    This function returns a summary statistics of the dataframe. Returns a string with the summary statistics.
    :param dataframe: dataframe
    :return: str
    """
    summary_statistics = dataframe.describe()
    return summary_statistics.to_string()


def summary_categorical(dataframe):
    """
    This function returns a string of a summary of categorical variables of a dataframe
    :param dataframe:
    :return:
    """
    # First find all numeric columns in the dataframe
    numeric_columns = dataframe._get_numeric_data().columns
    # Then find all categorical columns in the dataframe
    categorical_columns = dataframe.select_dtypes(include=['object']).columns
    print("Numeric columns: " + str(numeric_columns) + "\n" + "Categorical columns: " + str(categorical_columns))
    # Then find the summary statistics of each categorical column
    summary = ""
    for column in categorical_columns:
        # skip 'tweet', 'urls' and 'date' columns
        if column == 'tweet' or column == 'date' or column == 'urls':
            continue
        else:
            summary += column + ": " + str(dataframe[column].value_counts()) + "\n"
    # Return a nice decoration of summary
    return "Summary of categorical variables:\n" + summary


def main(From,
         To,
         Username,
         Nbr_Of_Tweets_To_Classify,
         button1):
    """
    Main function. Runs the program.
    :return: None
    """

    text_classifier = tc.TextClassifier(
        from_date=From,
        to_date=To,
        user_name=Username,
        num_tweets=int(Nbr_Of_Tweets_To_Classify))

    text_classifier.run_main_pipeline()
    dataframe = text_classifier.get_dataframe()
    return dataframe, get_summary_statistics(dataframe), dataframe.sort_values(by=['nlikes'], ascending=False)

if __name__ == "__main__":
    from datetime import date



    demo = gr.Interface(
        fn=main,
        inputs=['text', 'text', 'text', 'text', gr.components.Textbox(label="Run")],
        outputs=["dataframe", "text", "dataframe"],
    )
    demo.launch()

    # text_classifier = tc.TextClassifier(from_date='2022-01-01', to_date='2022-01-25', user_name=USER_LIST[1], num_tweets=20)
    # text_classifier.run_main_pipeline()
    # print(get_summary_statistics(text_classifier.get_dataframe()))
    # print(type(get_summary_statistics(text_classifier.get_dataframe())))
    # print(summary_categorical(text_classifier.get_dataframe()))