import gradio as gr
import pandas as pd
import numpy as np
import pandas as pd
import scipy.stats as ss
import seaborn as sns
from scipy.stats import chi2_contingency
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
def cramers_V(var1,var2) :
  crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building
  stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test
  obs = np.sum(crosstab) # Number of observations
  mini = min(crosstab.shape)-1 # Take the minimum value between the columns and the rows of the cross table
  return (stat/(obs*mini))

def predict(file_obj):
    
    df = pd.read_csv(file_obj.name)
    cat_df = df.select_dtypes(include=['object'])
    rows= []
    for var1 in cat_df:
      col = []
      for var2 in cat_df :
        cramers =cramers_V(cat_df[var1], cat_df[var2]) # Cramer's V test
        col.append(round(cramers,2)) # Keeping of the rounded value of the Cramer's V  
      rows.append(col)
    cramers_results = np.array(rows)
    df_final= pd.DataFrame(cramers_results, columns = cat_df.columns, index =cat_df.columns)
    
   # return df_final
    # data = np.random.randint(low=1,
    #                      high=1000,
    #                      size=(10, 10))
    annot = True
  
# plotting the heatmap
    plt.close()
    hm = sns.heatmap(data=df_final,
                    annot=annot)
    # return plt.show()
    # return plt.figure()
   # plt.savefig('box.png')
    
    return plt.gcf()
    # plt.clf()
    # return plt.plot()
   
    
iface = gr.Interface(predict,inputs="file",outputs="plot",examples=["StudentsPerformance.csv"],theme="dark-peach",title='Correlation Tool for Categorical features',description="This tool identifies and explains the correlation between categorical features.")
iface.launch(inline=False)