Spaces:
Runtime error
Runtime error
import gradio as gr | |
from textclassifier import TextClassifier as tc | |
from functions import functions as f | |
import time | |
USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM', | |
'dadgostarnooshi'] | |
def get_summary_statistics(dataframe): | |
""" | |
This function returns a summary statistics of the dataframe. Returns a string with the summary statistics. | |
:param dataframe: dataframe | |
:return: str | |
""" | |
summary_statistics = dataframe.describe() | |
return summary_statistics.to_string() | |
def summary_categorical(dataframe): | |
""" | |
This function returns a string of a summary of categorical variables of a dataframe | |
:param dataframe: | |
:return: | |
""" | |
# First find all numeric columns in the dataframe | |
numeric_columns = dataframe._get_numeric_data().columns | |
# Then find all categorical columns in the dataframe | |
categorical_columns = dataframe.select_dtypes(include=['object']).columns | |
print("Numeric columns: " + str(numeric_columns) + "\n" + "Categorical columns: " + str(categorical_columns)) | |
# Then find the summary statistics of each categorical column | |
summary = "" | |
for column in categorical_columns: | |
# skip 'tweet', 'urls' and 'date' columns | |
if column == 'tweet' or column == 'date' or column == 'urls': | |
continue | |
else: | |
summary += column + ": " + str(dataframe[column].value_counts()) + "\n" | |
# Return a nice decoration of summary | |
return "Summary of categorical variables:\n" + summary | |
def main(From, | |
To, | |
Username, | |
Nbr_Of_Tweets_To_Classify, | |
button1): | |
""" | |
Main function. Runs the program. | |
:return: None | |
""" | |
text_classifier = tc.TextClassifier( | |
from_date=From, | |
to_date=To, | |
user_name=Username, | |
num_tweets=int(Nbr_Of_Tweets_To_Classify)) | |
text_classifier.run_main_pipeline() | |
dataframe = text_classifier.get_dataframe() | |
return dataframe, get_summary_statistics(dataframe), dataframe.sort_values(by=['nlikes'], ascending=False) | |
if __name__ == "__main__": | |
from datetime import date | |
demo = gr.Interface( | |
fn=main, | |
inputs=['text', 'text', 'text', 'text', gr.components.Textbox(label="Run")], | |
outputs=["dataframe", "text", "dataframe"], | |
) | |
demo.launch() | |
# text_classifier = tc.TextClassifier(from_date='2022-01-01', to_date='2022-01-25', user_name=USER_LIST[1], num_tweets=20) | |
# text_classifier.run_main_pipeline() | |
# print(get_summary_statistics(text_classifier.get_dataframe())) | |
# print(type(get_summary_statistics(text_classifier.get_dataframe()))) | |
# print(summary_categorical(text_classifier.get_dataframe())) | |