Spaces:
Runtime error
Runtime error
File size: 2,751 Bytes
15ca093 e515291 297c37f e515291 5214b07 ec6943b 6ee7982 297c37f 6ee7982 297c37f ec6943b 5214b07 15ca093 6ee7982 2389616 6ee7982 2389616 6ee7982 2389616 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
from textclassifier import TextClassifier as tc
from functions import functions as f
import time
USER_LIST = ['jimmieakesson', 'BuschEbba', 'annieloof', 'JohanPehrson', 'bolund', 'martastenevi', 'SwedishPM',
'dadgostarnooshi']
def get_summary_statistics(dataframe):
"""
This function returns a summary statistics of the dataframe. Returns a string with the summary statistics.
:param dataframe: dataframe
:return: str
"""
summary_statistics = dataframe.describe()
return summary_statistics.to_string()
def summary_categorical(dataframe):
"""
This function returns a string of a summary of categorical variables of a dataframe
:param dataframe:
:return:
"""
# First find all numeric columns in the dataframe
numeric_columns = dataframe._get_numeric_data().columns
# Then find all categorical columns in the dataframe
categorical_columns = dataframe.select_dtypes(include=['object']).columns
print("Numeric columns: " + str(numeric_columns) + "\n" + "Categorical columns: " + str(categorical_columns))
# Then find the summary statistics of each categorical column
summary = ""
for column in categorical_columns:
# skip 'tweet', 'urls' and 'date' columns
if column == 'tweet' or column == 'date' or column == 'urls':
continue
else:
summary += column + ": " + str(dataframe[column].value_counts()) + "\n"
# Return a nice decoration of summary
return "Summary of categorical variables:\n" + summary
def main(From,
To,
Username,
Nbr_Of_Tweets_To_Classify,
button1):
"""
Main function. Runs the program.
:return: None
"""
text_classifier = tc.TextClassifier(
from_date=From,
to_date=To,
user_name=Username,
num_tweets=int(Nbr_Of_Tweets_To_Classify))
text_classifier.run_main_pipeline()
dataframe = text_classifier.get_dataframe()
return dataframe, get_summary_statistics(dataframe), dataframe.sort_values(by=['nlikes'], ascending=False)
if __name__ == "__main__":
from datetime import date
demo = gr.Interface(
fn=main,
inputs=['text', 'text', 'text', 'text', gr.components.Textbox(label="Run")],
outputs=["dataframe", "text", "dataframe"],
)
demo.launch()
# text_classifier = tc.TextClassifier(from_date='2022-01-01', to_date='2022-01-25', user_name=USER_LIST[1], num_tweets=20)
# text_classifier.run_main_pipeline()
# print(get_summary_statistics(text_classifier.get_dataframe()))
# print(type(get_summary_statistics(text_classifier.get_dataframe())))
# print(summary_categorical(text_classifier.get_dataframe()))
|