Anupam251272 commited on
Commit
cf45e4a
·
verified ·
1 Parent(s): 4b2272f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -0
app.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install required packages
2
+ #!pip install gradio pandas numpy plotly scikit-learn matplotlib seaborn openpyxl
3
+
4
+ import gradio as gr
5
+ import pandas as pd
6
+ import numpy as np
7
+ import plotly.express as px
8
+ import plotly.graph_objects as go
9
+ from sklearn.preprocessing import StandardScaler
10
+ import io
11
+
12
+ class DataVisualizationPlatform:
13
+ def __init__(self):
14
+ self.df = None
15
+ self.processed_df = None
16
+ self.scaler = StandardScaler()
17
+
18
+ def load_and_update_columns(self, file):
19
+ """Load data and return column choices"""
20
+ try:
21
+ if file.name.endswith('.csv'):
22
+ self.df = pd.read_csv(file.name)
23
+ else:
24
+ self.df = pd.read_excel(file.name)
25
+
26
+ columns = list(self.df.columns)
27
+ # Add "None" option for color column
28
+ columns_with_none = ["None"] + columns
29
+
30
+ return {
31
+ "status": f"Data loaded successfully. Shape: {self.df.shape}",
32
+ "columns": columns,
33
+ "columns_with_none": columns_with_none
34
+ }
35
+ except Exception as e:
36
+ return {
37
+ "status": f"Error loading data: {str(e)}",
38
+ "columns": [],
39
+ "columns_with_none": ["None"]
40
+ }
41
+
42
+ def preprocess_data(self):
43
+ """Preprocess the data"""
44
+ if self.df is None:
45
+ return "Please load data first"
46
+
47
+ try:
48
+ # Handle missing values
49
+ self.processed_df = self.df.copy()
50
+ numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns
51
+ self.processed_df[numeric_cols] = self.processed_df[numeric_cols].fillna(self.processed_df[numeric_cols].mean())
52
+
53
+ # Scale numeric features
54
+ self.processed_df[numeric_cols] = self.scaler.fit_transform(self.processed_df[numeric_cols])
55
+
56
+ return "Data preprocessing completed successfully"
57
+ except Exception as e:
58
+ return f"Error during preprocessing: {str(e)}"
59
+
60
+ def generate_summary(self):
61
+ """Generate basic statistics and info about the dataset"""
62
+ if self.df is None:
63
+ return "Please load data first"
64
+
65
+ try:
66
+ buffer = io.StringIO()
67
+ self.df.info(buf=buffer)
68
+ info_str = buffer.getvalue()
69
+
70
+ summary = f"""
71
+ Dataset Summary:
72
+ ----------------
73
+ Shape: {self.df.shape}
74
+
75
+ Data Info:
76
+ {info_str}
77
+
78
+ Basic Statistics:
79
+ {self.df.describe().to_string()}
80
+ """
81
+ return summary
82
+ except Exception as e:
83
+ return f"Error generating summary: {str(e)}"
84
+
85
+ def create_correlation_heatmap(self):
86
+ """Create correlation heatmap for numeric columns"""
87
+ if self.df is None:
88
+ return None
89
+
90
+ try:
91
+ numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns
92
+ if len(numeric_cols) == 0:
93
+ return None
94
+
95
+ corr = self.df[numeric_cols].corr()
96
+ fig = px.imshow(corr,
97
+ labels=dict(color="Correlation"),
98
+ title="Correlation Heatmap")
99
+ return fig
100
+ except Exception as e:
101
+ print(f"Error creating heatmap: {str(e)}")
102
+ return None
103
+
104
+ def create_scatter_plot(self, x_col, y_col, color_col):
105
+ """Create interactive scatter plot"""
106
+ if self.df is None or not x_col or not y_col:
107
+ return None
108
+
109
+ try:
110
+ if color_col == "None":
111
+ color_col = None
112
+
113
+ fig = px.scatter(self.df, x=x_col, y=y_col, color=color_col,
114
+ title=f"Scatter Plot: {x_col} vs {y_col}")
115
+ return fig
116
+ except Exception as e:
117
+ print(f"Error creating scatter plot: {str(e)}")
118
+ return None
119
+
120
+ def create_time_series(self, date_col, value_col):
121
+ """Create time series plot"""
122
+ if self.df is None or not date_col or not value_col:
123
+ return None
124
+
125
+ try:
126
+ fig = px.line(self.df, x=date_col, y=value_col,
127
+ title=f"Time Series: {value_col} over {date_col}")
128
+ return fig
129
+ except Exception as e:
130
+ print(f"Error creating time series: {str(e)}")
131
+ return None
132
+
133
+ def create_visualization_interface():
134
+ dvp = DataVisualizationPlatform()
135
+
136
+ with gr.Blocks(title="Data Visualization Platform") as interface:
137
+ gr.Markdown("# Interactive Data Visualization Platform")
138
+
139
+ # Shared state for column choices
140
+ state = gr.State({
141
+ "columns": [],
142
+ "columns_with_none": ["None"]
143
+ })
144
+
145
+ with gr.Tab("Data Loading & Preprocessing"):
146
+ file_input = gr.File(label="Upload CSV or Excel file")
147
+ load_btn = gr.Button("Load Data")
148
+ load_output = gr.Textbox(label="Loading Status")
149
+ preprocess_btn = gr.Button("Preprocess Data")
150
+ preprocess_output = gr.Textbox(label="Preprocessing Status")
151
+ summary_btn = gr.Button("Generate Summary")
152
+ summary_output = gr.Textbox(label="Data Summary", lines=10)
153
+
154
+ with gr.Tab("Visualizations"):
155
+ with gr.Row():
156
+ with gr.Column():
157
+ # Correlation Heatmap
158
+ heatmap_btn = gr.Button("Generate Correlation Heatmap")
159
+ heatmap_plot = gr.Plot(label="Correlation Heatmap")
160
+
161
+ with gr.Column():
162
+ # Scatter Plot
163
+ x_col = gr.Dropdown(label="X Column", choices=[])
164
+ y_col = gr.Dropdown(label="Y Column", choices=[])
165
+ color_col = gr.Dropdown(label="Color Column (optional)", choices=["None"])
166
+ scatter_btn = gr.Button("Generate Scatter Plot")
167
+ scatter_plot = gr.Plot(label="Scatter Plot")
168
+
169
+ with gr.Row():
170
+ # Time Series
171
+ date_col = gr.Dropdown(label="Date Column", choices=[])
172
+ value_col = gr.Dropdown(label="Value Column", choices=[])
173
+ timeseries_btn = gr.Button("Generate Time Series")
174
+ timeseries_plot = gr.Plot(label="Time Series Plot")
175
+
176
+ def update_interface(file):
177
+ result = dvp.load_and_update_columns(file)
178
+ return {
179
+ load_output: result["status"],
180
+ x_col: gr.Dropdown(choices=result["columns"]),
181
+ y_col: gr.Dropdown(choices=result["columns"]),
182
+ color_col: gr.Dropdown(choices=result["columns_with_none"]),
183
+ date_col: gr.Dropdown(choices=result["columns"]),
184
+ value_col: gr.Dropdown(choices=result["columns"])
185
+ }
186
+
187
+ # Event handlers
188
+ load_btn.click(
189
+ fn=update_interface,
190
+ inputs=[file_input],
191
+ outputs=[load_output, x_col, y_col, color_col, date_col, value_col]
192
+ )
193
+
194
+ preprocess_btn.click(fn=dvp.preprocess_data, outputs=preprocess_output)
195
+ summary_btn.click(fn=dvp.generate_summary, outputs=summary_output)
196
+ heatmap_btn.click(fn=dvp.create_correlation_heatmap, outputs=heatmap_plot)
197
+ scatter_btn.click(
198
+ fn=dvp.create_scatter_plot,
199
+ inputs=[x_col, y_col, color_col],
200
+ outputs=scatter_plot
201
+ )
202
+ timeseries_btn.click(
203
+ fn=dvp.create_time_series,
204
+ inputs=[date_col, value_col],
205
+ outputs=timeseries_plot
206
+ )
207
+
208
+ return interface
209
+
210
+ # Launch the interface
211
+ demo = create_visualization_interface()
212
+ demo.launch()