datasciencedojo's picture
Update app.py
36d3d07
raw
history blame
No virus
4.38 kB
import gradio as gr
import pandas as pd
from sklearn import datasets
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
def findCorrelation(dataset, target):
df = pd.read_csv(dataset.name)
non_numeric_cols = df.select_dtypes('object').columns.tolist()
if target in non_numeric_cols:
label_encoder = LabelEncoder()
df[non_numeric_col] = label_encoder.fit_transform(df[target])
d = df.corr()[target].to_dict()
d.pop(target)
keys = sorted(d.items(), key=lambda x: x[0], reverse=True)
fig1 = plt.figure()
hm = sns.heatmap(df.corr(), annot = True)
hm.set(title = "Correlation matrix of dataset\n")
try:
fig2 = plt.figure()
sns.regplot(x=df[keys[0][0]], y=df[target])
except:
fig2 = plt.figure()
try:
fig3 = plt.figure()
sns.regplot(x=df[keys[1][0]], y=df[target])
except:
fig3 = plt.figure()
try:
fig4 = plt.figure()
sns.regplot(x=df[keys[2][0]], y=df[target])
except:
fig4 = plt.figure()
return d, fig1, fig2, fig3, fig4
css = """
footer {display:none !important}
.output-markdown{display:none !important}
div[data-testid="label"] {height: 18rem !important; overflow-x : hidden !important; overflow-y: scroll !important;}
.max-h-\[30rem\] {max-height: 18rem !important;}
.gr-button-lg {
z-index: 14;
width: 113px;
height: 30px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(17, 20, 45) !important;
border: none !important;
text-align: center !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 6px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: none !important;
}
.gr-button-lg:hover{
z-index: 14;
width: 113px;
height: 30px;
left: 0px;
top: 0px;
padding: 0px;
cursor: pointer !important;
background: none rgb(37, 56, 133) !important;
border: none !important;
text-align: center !important;
font-size: 14px !important;
font-weight: 500 !important;
color: rgb(255, 255, 255) !important;
line-height: 1 !important;
border-radius: 6px !important;
transition: box-shadow 200ms ease 0s, background 200ms ease 0s !important;
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
}
.hover\:bg-orange-50:hover {
--tw-bg-opacity: 1 !important;
background-color: rgb(229,225,255) !important;
}
.to-orange-200 {
--tw-gradient-to: rgb(37 56 133 / 37%) !important;
}
.from-orange-400 {
--tw-gradient-from: rgb(17, 20, 45) !important;
--tw-gradient-to: rgb(255 150 51 / 0);
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important;
}
.group-hover\:from-orange-500{
--tw-gradient-from:rgb(17, 20, 45) !important;
--tw-gradient-to: rgb(37 56 133 / 37%);
--tw-gradient-stops: var(--tw-gradient-from), var(--tw-gradient-to) !important;
}
.group:hover .group-hover\:text-orange-500{
--tw-text-opacity: 1 !important;
color:rgb(37 56 133 / var(--tw-text-opacity)) !important;
}
"""
with gr.Blocks(title="Find Correlation | Data Science Dojo", css = css) as demo:
with gr.Row():
with gr.Column():
file = gr.File()
with gr.Column():
inp = gr.Textbox(placeholder="Enter the target feature name", label="Target Variable")
btn = gr.Button("Find correlation")
gr.Markdown(
"""
## Correlation with other numeric features
""")
with gr.Row():
labels = gr.Label(num_top_classes = 10)
gr.Markdown(
"""
## HeatMap
""")
with gr.Row():
fig1 = gr.Plot()
gr.Markdown(
"""
## Plot of top 3 correlated features
""")
with gr.Row():
with gr.Column():
fig2 = gr.Plot()
with gr.Column():
fig3 = gr.Plot()
with gr.Row():
fig4 = gr.Plot()
with gr.Row():
gr.Examples(
examples = [["boston.csv", "MEDV"]], fn=findCorrelation, inputs=[file, inp], outputs=[labels, fig1, fig2, fig3, fig4], cache_examples=True)
btn.click( fn=findCorrelation, inputs=[file, inp], outputs=[labels, fig1, fig2, fig3, fig4])
demo.launch()