DrishtiSharma commited on
Commit
964d389
ยท
verified ยท
1 Parent(s): 252408a

Update dummy_funcs.py

Browse files
Files changed (1) hide show
  1. dummy_funcs.py +180 -15
dummy_funcs.py CHANGED
@@ -1,12 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  def add_stats_to_figure(fig, df, y_axis, chart_type):
2
- # Calculate statistics
 
 
 
 
 
 
 
 
 
3
  min_val = df[y_axis].min()
4
  max_val = df[y_axis].max()
5
  avg_val = df[y_axis].mean()
6
  median_val = df[y_axis].median()
7
  std_dev_val = df[y_axis].std()
8
 
9
- # Stats summary text
10
  stats_text = (
11
  f"๐Ÿ“Š **Statistics**\n\n"
12
  f"- **Min:** ${min_val:,.2f}\n"
@@ -16,36 +48,169 @@ def add_stats_to_figure(fig, df, y_axis, chart_type):
16
  f"- **Std Dev:** ${std_dev_val:,.2f}"
17
  )
18
 
19
- # Charts suitable for stats annotations
20
- if chart_type in ["bar", "line", "scatter"]:
21
- # Add annotation box
22
  fig.add_annotation(
23
  text=stats_text,
24
  xref="paper", yref="paper",
25
- x=1.05, y=1,
26
  showarrow=False,
27
  align="left",
28
  font=dict(size=12, color="black"),
29
- bordercolor="black",
30
  borderwidth=1,
31
- bgcolor="rgba(255, 255, 255, 0.8)"
32
  )
33
 
34
- # Add horizontal lines for min, median, avg, max
35
  fig.add_hline(y=min_val, line_dash="dot", line_color="red", annotation_text="Min", annotation_position="bottom right")
36
  fig.add_hline(y=median_val, line_dash="dash", line_color="orange", annotation_text="Median", annotation_position="top right")
37
  fig.add_hline(y=avg_val, line_dash="dashdot", line_color="green", annotation_text="Avg", annotation_position="top right")
38
  fig.add_hline(y=max_val, line_dash="dot", line_color="blue", annotation_text="Max", annotation_position="top right")
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  elif chart_type == "box":
41
- # Box plots already show distribution (no extra stats needed)
42
  pass
43
 
44
  elif chart_type == "pie":
45
- # Pie charts don't need statistical overlays
46
- st.info("๐Ÿ“Š Pie charts focus on proportions. No additional stats displayed.")
47
-
 
 
 
 
48
  else:
49
- st.warning(f"โš ๏ธ No stats added for unsupported chart type: {chart_type}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- return fig
 
 
 
1
+ def ask_gpt4o_for_visualization(query, df, llm):
2
+ columns = ', '.join(df.columns)
3
+ prompt = f"""
4
+ Analyze the query and suggest one or more relevant visualizations.
5
+ Query: "{query}"
6
+ Available Columns: {columns}
7
+ Respond in this JSON format (as a list if multiple suggestions):
8
+ [
9
+ {{
10
+ "chart_type": "bar/box/line/scatter",
11
+ "x_axis": "column_name",
12
+ "y_axis": "column_name",
13
+ "group_by": "optional_column_name"
14
+ }}
15
+ ]
16
+ """
17
+ response = llm.generate(prompt)
18
+ try:
19
+ return json.loads(response)
20
+ except json.JSONDecodeError:
21
+ st.error("โš ๏ธ GPT-4o failed to generate a valid suggestion.")
22
+ return None
23
+
24
  def add_stats_to_figure(fig, df, y_axis, chart_type):
25
+ """
26
+ Add relevant statistical annotations to the visualization
27
+ based on the chart type.
28
+ """
29
+ # Check if the y-axis column is numeric
30
+ if not pd.api.types.is_numeric_dtype(df[y_axis]):
31
+ st.warning(f"โš ๏ธ Cannot compute statistics for non-numeric column: {y_axis}")
32
+ return fig
33
+
34
+ # Compute statistics for numeric data
35
  min_val = df[y_axis].min()
36
  max_val = df[y_axis].max()
37
  avg_val = df[y_axis].mean()
38
  median_val = df[y_axis].median()
39
  std_dev_val = df[y_axis].std()
40
 
41
+ # Format the stats for display
42
  stats_text = (
43
  f"๐Ÿ“Š **Statistics**\n\n"
44
  f"- **Min:** ${min_val:,.2f}\n"
 
48
  f"- **Std Dev:** ${std_dev_val:,.2f}"
49
  )
50
 
51
+ # Apply stats only to relevant chart types
52
+ if chart_type in ["bar", "line"]:
53
+ # Add annotation box for bar and line charts
54
  fig.add_annotation(
55
  text=stats_text,
56
  xref="paper", yref="paper",
57
+ x=1.02, y=1,
58
  showarrow=False,
59
  align="left",
60
  font=dict(size=12, color="black"),
61
+ bordercolor="gray",
62
  borderwidth=1,
63
+ bgcolor="rgba(255, 255, 255, 0.85)"
64
  )
65
 
66
+ # Add horizontal reference lines
67
  fig.add_hline(y=min_val, line_dash="dot", line_color="red", annotation_text="Min", annotation_position="bottom right")
68
  fig.add_hline(y=median_val, line_dash="dash", line_color="orange", annotation_text="Median", annotation_position="top right")
69
  fig.add_hline(y=avg_val, line_dash="dashdot", line_color="green", annotation_text="Avg", annotation_position="top right")
70
  fig.add_hline(y=max_val, line_dash="dot", line_color="blue", annotation_text="Max", annotation_position="top right")
71
 
72
+ elif chart_type == "scatter":
73
+ # Add stats annotation only, no lines for scatter plots
74
+ fig.add_annotation(
75
+ text=stats_text,
76
+ xref="paper", yref="paper",
77
+ x=1.02, y=1,
78
+ showarrow=False,
79
+ align="left",
80
+ font=dict(size=12, color="black"),
81
+ bordercolor="gray",
82
+ borderwidth=1,
83
+ bgcolor="rgba(255, 255, 255, 0.85)"
84
+ )
85
+
86
  elif chart_type == "box":
87
+ # Box plots inherently show distribution; no extra stats needed
88
  pass
89
 
90
  elif chart_type == "pie":
91
+ # Pie charts represent proportions, not suitable for stats
92
+ st.info("๐Ÿ“Š Pie charts represent proportions. Additional stats are not applicable.")
93
+
94
+ elif chart_type == "heatmap":
95
+ # Heatmaps already reflect data intensity
96
+ st.info("๐Ÿ“Š Heatmaps inherently reflect distribution. No additional stats added.")
97
+
98
  else:
99
+ st.warning(f"โš ๏ธ No statistical overlays applied for unsupported chart type: '{chart_type}'.")
100
+
101
+ return fig
102
+
103
+
104
+ # Dynamically generate Plotly visualizations based on GPT-4o suggestions
105
+ def generate_visualization(suggestion, df):
106
+ """
107
+ Generate a Plotly visualization based on GPT-4o's suggestion.
108
+ If the Y-axis is missing, infer it intelligently.
109
+ """
110
+ chart_type = suggestion.get("chart_type", "bar").lower()
111
+ x_axis = suggestion.get("x_axis")
112
+ y_axis = suggestion.get("y_axis")
113
+ group_by = suggestion.get("group_by")
114
+
115
+ # Step 1: Infer Y-axis if not provided
116
+ if not y_axis:
117
+ numeric_columns = df.select_dtypes(include='number').columns.tolist()
118
+
119
+ # Avoid using the same column for both axes
120
+ if x_axis in numeric_columns:
121
+ numeric_columns.remove(x_axis)
122
+
123
+ # Smart guess: prioritize salary or relevant metrics if available
124
+ priority_columns = ["salary_in_usd", "income", "earnings", "revenue"]
125
+ for col in priority_columns:
126
+ if col in numeric_columns:
127
+ y_axis = col
128
+ break
129
+
130
+ # Fallback to the first numeric column if no priority columns exist
131
+ if not y_axis and numeric_columns:
132
+ y_axis = numeric_columns[0]
133
+
134
+ # Step 2: Validate axes
135
+ if not x_axis or not y_axis:
136
+ st.warning("โš ๏ธ Unable to determine appropriate columns for visualization.")
137
+ return None
138
+
139
+ # Step 3: Dynamically select the Plotly function
140
+ plotly_function = getattr(px, chart_type, None)
141
+ if not plotly_function:
142
+ st.warning(f"โš ๏ธ Unsupported chart type '{chart_type}' suggested by GPT-4o.")
143
+ return None
144
+
145
+ # Step 4: Prepare dynamic plot arguments
146
+ plot_args = {"data_frame": df, "x": x_axis, "y": y_axis}
147
+ if group_by and group_by in df.columns:
148
+ plot_args["color"] = group_by
149
+
150
+ try:
151
+ # Step 5: Generate the visualization
152
+ fig = plotly_function(**plot_args)
153
+ fig.update_layout(
154
+ title=f"{chart_type.title()} Plot of {y_axis.replace('_', ' ').title()} by {x_axis.replace('_', ' ').title()}",
155
+ xaxis_title=x_axis.replace('_', ' ').title(),
156
+ yaxis_title=y_axis.replace('_', ' ').title(),
157
+ )
158
+
159
+ # Step 6: Apply statistics intelligently
160
+ fig = add_statistics_to_visualization(fig, df, y_axis, chart_type)
161
+
162
+ return fig
163
+
164
+ except Exception as e:
165
+ st.error(f"โš ๏ธ Failed to generate visualization: {e}")
166
+ return None
167
+
168
+
169
+ def generate_multiple_visualizations(suggestions, df):
170
+ """
171
+ Generates one or more visualizations based on GPT-4o's suggestions.
172
+ Handles both single and multiple suggestions.
173
+ """
174
+ visualizations = []
175
+
176
+ for suggestion in suggestions:
177
+ fig = generate_visualization(suggestion, df)
178
+ if fig:
179
+ # Apply chart-specific statistics
180
+ fig = add_stats_to_figure(fig, df, suggestion["y_axis"], suggestion["chart_type"])
181
+ visualizations.append(fig)
182
+
183
+ if not visualizations and suggestions:
184
+ st.warning("โš ๏ธ No valid visualization found. Displaying the most relevant one.")
185
+ best_suggestion = suggestions[0]
186
+ fig = generate_visualization(best_suggestion, df)
187
+ fig = add_stats_to_figure(fig, df, best_suggestion["y_axis"], best_suggestion["chart_type"])
188
+ visualizations.append(fig)
189
+
190
+ return visualizations
191
+
192
+
193
+ def handle_visualization_suggestions(suggestions, df):
194
+ """
195
+ Determines whether to generate a single or multiple visualizations.
196
+ """
197
+ visualizations = []
198
+
199
+ # If multiple suggestions, generate multiple plots
200
+ if isinstance(suggestions, list) and len(suggestions) > 1:
201
+ visualizations = generate_multiple_visualizations(suggestions, df)
202
+
203
+ # If only one suggestion, generate a single plot
204
+ elif isinstance(suggestions, dict) or (isinstance(suggestions, list) and len(suggestions) == 1):
205
+ suggestion = suggestions[0] if isinstance(suggestions, list) else suggestions
206
+ fig = generate_visualization(suggestion, df)
207
+ if fig:
208
+ visualizations.append(fig)
209
+
210
+ # Handle cases when no visualization could be generated
211
+ if not visualizations:
212
+ st.warning("โš ๏ธ Unable to generate any visualization based on the suggestion.")
213
 
214
+ # Display all generated visualizations
215
+ for fig in visualizations:
216
+ st.plotly_chart(fig, use_container_width=True)