import gradio as gr from textclassifier import TextClassifier as tc from functions import functions as f import time USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', 'dadgostarnooshi'] def main(from_date, to_date, user_name): """ Main function. Runs the program. :return: None """ from_date = from_date to_date = to_date user_name = user_name text_classifier = tc.TextClassifier(from_date=from_date, to_date=to_date, user_name=user_name, num_tweets=20) text_classifier.run_main_pipeline() dataframe = text_classifier.get_dataframe() return dataframe, get_summary_statistics(dataframe) def get_summary_statistics(dataframe): """ This function returns a summary statistics of the dataframe. Returns a string with the summary statistics. :param dataframe: dataframe :return: str """ summary_statistics = dataframe.describe() return summary_statistics.to_string() def summary_categorical(dataframe): """ This function returns a string of a summary of categorical variables of a dataframe :param dataframe: :return: """ # First find all numeric columns in the dataframe numeric_columns = dataframe._get_numeric_data().columns # Then find all categorical columns in the dataframe categorical_columns = dataframe.select_dtypes(include=['object']).columns print("Numeric columns: " + str(numeric_columns) + "\n" + "Categorical columns: " + str(categorical_columns)) # Then find the summary statistics of each categorical column summary = "" for column in categorical_columns: # skip 'tweet', 'urls' and 'date' columns if column == 'tweet' or column == 'date' or column == 'urls': continue else: summary += column + ": " + str(dataframe[column].value_counts()) + "\n" # Return a nice decoration of summary return "Summary of categorical variables:\n" + summary if __name__ == "__main__": from datetime import date # demo = gr.Interface( # fn=main, # inputs=['text', 'text', 'text'], # outputs=["dataframe", "text"], # ) # demo.launch() text_classifier = tc.TextClassifier(from_date='2019-07-01', to_date='2022-07-31', user_name=USER_LIST[1], num_tweets=20) text_classifier.run_main_pipeline() print(get_summary_statistics(text_classifier.get_dataframe())) print(type(get_summary_statistics(text_classifier.get_dataframe()))) print(summary_categorical(text_classifier.get_dataframe()))