import gradio as gr import pandas as pd import numpy as np import pandas as pd import scipy.stats as ss import seaborn as sns from scipy.stats import chi2_contingency import numpy as np import seaborn as sns import matplotlib.pyplot as plt import os def cramers_V(var1,var2) : crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test obs = np.sum(crosstab) # Number of observations mini = min(crosstab.shape)-1 # Take the minimum value between the columns and the rows of the cross table return (stat/(obs*mini)) def predict(file_obj): df = pd.read_csv(file_obj.name) cat_df = df.select_dtypes(include=['object']) rows= [] for var1 in cat_df: col = [] for var2 in cat_df : cramers =cramers_V(cat_df[var1], cat_df[var2]) # Cramer's V test col.append(round(cramers,2)) # Keeping of the rounded value of the Cramer's V rows.append(col) cramers_results = np.array(rows) df_final= pd.DataFrame(cramers_results, columns = cat_df.columns, index =cat_df.columns) # return df_final # data = np.random.randint(low=1, # high=1000, # size=(10, 10)) annot = True # plotting the heatmap plt.close() hm = sns.heatmap(data=df_final, annot=annot) # return plt.show() # return plt.figure() # plt.savefig('box.png') return plt.gcf() # plt.clf() # return plt.plot() iface = gr.Interface(predict,inputs="file",outputs="plot",examples=["StudentsPerformance.csv"],theme="dark-peach",title='Correlation Tool for Categorical features',description="This tool identifies and explains the correlation between categorical features.") iface.launch(inline=False)