import pandas as pd
import ydata_profiling
import gradio as gr
from pydantic_settings import BaseSettings
from tempfile import NamedTemporaryFile
import sweetviz as sv
#from datatile.summary.df import DataFrameSummary
from autoviz.AutoViz_Class import AutoViz_Class
from traceml.summary.df import DataFrameSummary
def variable_table(df):
"""
Analyzes a DataFrame and categorizes variables with colorful HTML formatting.
Args:
df (pandas.DataFrame): The DataFrame to analyze.
Returns:
str: HTML code representing the analysis results with colorful highlights.
"""
# Analyze variable types
numerical_vars=[]
text_vars=[]
categorical_vars=[]
# Identify categorical, numerical, and text columns
# Iterate over columns
for col in df.columns:
print(col )
unique_values = df[col].nunique()
total_values = df[col].count()
dtype_kind = df[col].dtype.kind
print( 'unique value-',unique_values,' total_values ',total_values,dtype_kind)
# Check criteria for numerical, text, and categorical variables
if (dtype_kind == 'f' or dtype_kind == 'i') and unique_values >= total_values / 10:
numerical_vars.append(col)
print('Numerical')
elif (dtype_kind == 'O' or dtype_kind == 'b') and unique_values > total_values / 10:
text_vars.append(col)
print('Text')
else:
categorical_vars.append(col)
print('categorical')
print(categorical_vars,numerical_vars,text_vars)
# Build HTML table with styles
table_style = 'border: 1px solid #ddd; border-collapse: collapse; text-align: left; font-size: 14px;'
header_style = 'background-color: #f2f2f2; padding: 5px 10px;'
data_style = 'padding: 5px 10px; border-bottom: 1px solid #ddd;'
category_color = '#90ee90' # Light green for categorical
numerical_color = '#add8e6' # Light blue for numerical
text_color = '#ffd9b3' # Light yellow for text
html = f"
"
html += f"Variable Type | Columns |
"
# Add rows for each variable type with coloring
if categorical_vars:
html += f"Categorical | {', '.join(categorical_vars)} |
"
if numerical_vars:
html += f"Numerical | {', '.join(numerical_vars)} |
"
if text_vars:
html += f"Text | {', '.join(text_vars)} |
"
# Handle cases where no variables are found
if not (categorical_vars or numerical_vars or text_vars):
html += "No variables found! |
"
html += "
"
return html
def generate_report(file, type):
df = pd.read_csv(file) if file.name.endswith(".csv") else pd.read_excel(file)
pandas_html_report =ydata_profiling.ProfileReport(df).to_html()
temp_file1 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file1.write(pandas_html_report.encode('utf-8'))
temp_file1.close()
print('Pandas completed')
# dataprep_report = create_report(df)
# temp_file2 = NamedTemporaryFile(delete=False, suffix=".html")
# temp_file2.write(dataprep_report.encode('utf-8'))
# temp_file2.close()
def convert_mixed_to_str(df):
mixed_cols = df.select_dtypes(include=['object']).columns # Detect object dtype (usually mixed)
for col in mixed_cols:
df[col] = df[col].astype(str) # Convert mixed columns to strings
return df
df = convert_mixed_to_str(df.copy())
df.columns = df.columns.str.upper()
sweetviz_report = sv.analyze(df)
#sweetviz_report=sweetviz_report.show_html()
#print(type(sweetviz_report))
report=sweetviz_report.show_html( filepath='SWEETVIZ_REPORT.html',
open_browser=False,
layout='widescreen',
scale=None)
HTMLFileToBeOpened = open('SWEETVIZ_REPORT.html', "r")
# Reading the file and storing in a variable
contents = HTMLFileToBeOpened.read()
print('Pandas completed')
temp_file3 = NamedTemporaryFile(delete=False, suffix=".html")
temp_file3.write(contents.encode('utf-8'))
temp_file3.close()
try:
df=AV.AutoViz('/content/insurance.csv')
dft = AV.AutoViz(filename="", sep=',', dfte=df, header=0, verbose=1,
lowess=False,chart_format='html',max_rows_analyzed=150000,
max_cols_analyzed=30,save_plot_dir="plots_html")
dft = AV.AutoViz(filename='/content/insurance.csv', sep=',', header=0, verbose=2,
lowess=False,chart_format='png',max_rows_analyzed=150000,
max_cols_analyzed=30,save_plot_dir="plots")
except Exception as e:
dfviz=df
print(e)
print('Exception during Autoviz')
reports = []
output_dir_html='plots_html/AutoViz'
for filename in os.listdir(output_dir_html):
if filename.endswith(".html"):
title = os.path.splitext(filename)[0] # Extract title from filename (without extension)
reports.append(title)
reports.append(output_dir_html+'/'+filename)
imgs = []
output_dir_img='plots/Autoviz'
for filename in os.listdir(output_dir_img):
if filename.endswith(".png"):
title = os.path.splitext(filename)[0] # Extract title from filename (without extension)
imgs.append(title)
imgs.append(output_dir_img+'/'+filename)
while len(reports)<10:
# Comparing the lengths and appending t and m to output if needed
t = gr.Markdown(visible=False)
m = gr.File(visible=False)
reports.append(t)
reports.append(m)
while len(imgs)<10:
# Comparing the lengths and appending t and m to output if needed
t = gr.Markdown(visible=False)
m = gr.Image(visible=False)
imgs.append(t)
imgs.append(m)
dfs = DataFrameSummary(df)
sd=dfs.summary()
sd.index.name = 'PARAMETERS'
sd1= sd.reset_index(drop=False)
def highlight_cols(x):
df = x.copy()
df.loc[:, :] = 'color: purple'
df[['PARAMETERS']] = 'color: red'
return df
# Applying the style function
sd1 = sd1.style.apply(highlight_cols, axis = None)
sd1.columns = sd1.columns.str.upper()
return [temp_file1.name ,temp_file3.name ,variable_table(df),dfviz,sd1]+reports+imgs
def handle_click(file, sheet_name):
results = generate_report(file, sheet_name)
return results
def update_sheets(file):
sheet_names = pd.ExcelFile(file).sheet_names
sheet_dropdown = gr.Dropdown(
label="Select sheet", choices=[sheet_names], visible=True # Initially disabled
)
return sheet_dropdown
with gr.Blocks() as cluster:
with gr.Column():
gr.HTML(value="""EXCEL ANALYSIS AND INSIGHTS
""")
with gr.Row():
file=gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
btn=gr.Button(value="Download Report")
with gr.Row():
gr.HTML(value="""PANDAS REPORT
""")
out1=gr.File(label="Download CSV")
gr.HTML(value="""SWEETVIZ REPORT
""")
out2=gr.File(label="Download CSV")
sheet_dropdown = gr.Dropdown(
label="Select sheet", choices=[], visible=False # Initially disabled
)
with gr.Column():
gr.HTML(value="""""")
#gr.Markdown("***Uploaded File***")
var=gr.HTML()
with gr.Column():
gr.HTML(value="""""")
#gr.Markdown("#Uploaded File")
dataframe1=gr.Dataframe()
with gr.Column():
gr.HTML(value="""""")
#gr.Markdown("#Columns Analysis")
dataframe2=gr.Dataframe()
#Autoviz insights graphs
with gr.Column():
gr.HTML(value="""EXCEL ANALYSIS AND INSIGHTS
""")
with gr.Row():
file = gr.File(file_types=['.csv', '.xlsx'], label="Upload a CSV or Excel file")
btn = gr.Button("Download Report")
# AutoViz report display section
#autoviz_reports = gr.Column()
gr.HTML(value="""Insight Visualisations
""")
with gr.Column():
plots2 = []
for i in range(5):
t = gr.Markdown()
with gr.Column(scale=1):
m=gr.Image()
plots2.append(t)
plots2.append(m)
print(type(plots2[0]))
gr.HTML(value="""Insight graphs
""")
with gr.Row():
plots1 = []
for i in range(5):
with gr.Row():
t = gr.Markdown()
m=gr.File()
plots1.append(t)
plots1.append(m)
print(type(plots1[0]))
btn.click(handle_click, inputs=[file, sheet_dropdown], outputs=[out1, out2, var,dataframe1, dataframe2]+plots1+plots2)
file.change(update_sheets, inputs=file, outputs=sheet_dropdown)
#btn.click(generate_report,inputs=[file],outputs=[out1,out2,var,dataframe1,dataframe2])
cluster.launch()