aitek230telu's picture
Upload 52 files
0ab7b0c verified
#import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os
plt.style.use('seaborn-colorblind')
# 2018.11.07 Created by Eamon.Zhang
def get_dtypes(data,drop_col=[]):
"""Return the dtypes for each column of a pandas Dataframe
Parameters
----------
data : pandas Dataframe
drop_col : columns to omit in a list
Returns
-------
str_var_list, num_var_list, all_var_list
"""
name_of_col = list(data.columns)
num_var_list = []
str_var_list = []
all_var_list = []
str_var_list = name_of_col.copy()
for var in name_of_col:
# check if column belongs to numeric type
if (data[var].dtypes in (np.int, np.int64, np.uint, np.int32, np.float,
np.float64, np.float32, np.double)):
str_var_list.remove(var)
num_var_list.append(var)
# drop the omit column from list
for var in drop_col:
if var in str_var_list:
str_var_list.remove(var)
if var in num_var_list:
num_var_list.remove(var)
all_var_list.extend(str_var_list)
all_var_list.extend(num_var_list)
return str_var_list, num_var_list, all_var_list
def describe(data,output_path=None):
"""output the general description of a pandas Dataframe
into a csv file
"""
result = data.describe(include='all')
if output_path is not None:
output = os.path.join(output_path,'describe.csv')
result.to_csv(output)
print('result saved at:', str(output))
return result
def discrete_var_barplot(x,y,data,output_path=None):
"""draw the barplot of a discrete variable x against y(target variable).
By default the bar shows the mean value of y.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.barplot(x=x,y=y,data=data)
if output_path is not None:
output = os.path.join(output_path,'Barplot_'+str(x)+'_'+str(y)+'.png')
plt.savefig(output)
print('Image saved at', str(output))
def discrete_var_countplot(x,data,output_path=None):
"""draw the countplot of a discrete variable x.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.countplot(x=x,data=data)
if output_path is not None:
output = os.path.join(output_path,'Countplot_'+str(x)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def discrete_var_boxplot(x,y,data,output_path=None):
"""draw the boxplot of a discrete variable x against y.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.boxplot(x=x,y=y,data=data)
if output_path is not None:
output = os.path.join(output_path,'Boxplot_'+str(x)+'_'+str(y)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def continuous_var_distplot(x,output_path=None,bins=None):
"""draw the distplot of a continuous variable x.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.distplot(a=x,kde=False,bins=bins)
if output_path is not None:
output=os.path.join(output_path,'Distplot_'+str(x.name)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
# 2018.11.28 Created by Eamon.Zhang
def scatter_plot(x,y,data,output_path=None):
"""draw the scatter-plot of two variables.
Parameters
----------
Returns
-------
figure save as PNG
"""
plt.figure(figsize=(15,10))
sns.scatterplot(x=x,y=y,data=data)
if output_path is not None:
output = os.path.join(output_path,'Scatter_plot_'+str(x.name)+'_'+str(y.name)+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def correlation_plot(data,output_path=None):
"""draw the correlation plot between variables.
Parameters
----------
Returns
-------
figure save as PNG
"""
corrmat = data.corr()
fig, ax = plt.subplots()
fig.set_size_inches(11,11)
sns.heatmap(corrmat,cmap="YlGnBu",linewidths=.5,annot=True)
if output_path is not None:
output = os.path.join(output_path,'Corr_plot'+'.png')
plt.savefig(output)
print('Image saved at',str(output))
def heatmap(data,output_path=None,fmt='d'):
"""draw the heatmap between 2 variables.
Parameters
----------
Returns
-------
figure save as PNG
"""
fig, ax = plt.subplots()
fig.set_size_inches(11,11)
sns.heatmap(data,cmap="YlGnBu",linewidths=.5,annot=True,fmt=fmt)
if output_path is not None:
output = os.path.join(output_path,'Heatmap'+'.png')
plt.savefig(output)
print('Image saved at',str(output))