Spaces:

MicroHealth
/

autodata-visualizer

Paused

App Files Files Community

bluenevus commited on 23 days ago

Commit

b7ede47

verified ·

1 Parent(s): 3ffb96a

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -86

app.py CHANGED Viewed

@@ -1,24 +1,78 @@
-import gradio as gr
-import pandas as pd
-import matplotlib.pyplot as plt
 import io
 import ast
-from PIL import Image, ImageDraw
-import google.generativeai as genai
 import traceback
-import os
-def process_file(file, instructions):
     try:
         # Initialize Gemini
         api_key = os.environ.get('GEMINI_API_KEY')
         genai.configure(api_key=api_key)
         model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
-        # Read uploaded file
-        file_path = file.name
-        df = pd.read_csv(file_path) if file_path.endswith('.csv') else pd.read_excel(file_path)
         # Generate visualization code
         response = model.generate_content(f"""
             Analyze the following dataset and instructions:
@@ -50,86 +104,64 @@ def process_file(file, instructions):
         elif '```' in code_block:
             code_block = code_block.split('```')[1].strip()
-        print("Generated code block:")
-        print(code_block)
         plots = ast.literal_eval(code_block)
-        # Generate visualizations
-        images = []
-        for plot in plots[:3]:  # Ensure max 3 plots
-            fig, ax = plt.subplots(figsize=(10, 6))
-            # Apply preprocessing and aggregation
-            plot_df = df.copy()
-            if plot['agg_func'] == 'sum':
-                plot_df = plot_df.groupby(plot['x'])[plot['y']].sum().reset_index()
-            elif plot['agg_func'] == 'mean':
-                plot_df = plot_df.groupby(plot['x'])[plot['y']].mean().reset_index()
-            elif plot['agg_func'] == 'count':
-                plot_df = plot_df.groupby(plot['x']).size().reset_index(name=plot['y'])
-            if 'top_n' in plot and plot['top_n']:
-                plot_df = plot_df.nlargest(plot['top_n'], plot['y'])
-            if plot['plot_type'] == 'bar':
-                plot_df.plot(kind='bar', x=plot['x'], y=plot['y'], ax=ax)
-            elif plot['plot_type'] == 'line':
-                plot_df.plot(kind='line', x=plot['x'], y=plot['y'], ax=ax)
-            elif plot['plot_type'] == 'scatter':
-                plot_df.plot(kind='scatter', x=plot['x'], y=plot['y'], ax=ax,
-                             c=plot['additional'].get('color'), s=plot_df[plot['additional'].get('size', 'y')])
-            elif plot['plot_type'] == 'hist':
-                plot_df[plot['x']].hist(ax=ax, bins=20)
-            elif plot['plot_type'] == 'pie':
-                plot_df.plot(kind='pie', y=plot['y'], labels=plot_df[plot['x']], ax=ax, autopct='%1.1f%%')
-            elif plot['plot_type'] == 'heatmap':
-                pivot_df = plot_df.pivot(index=plot['x'], columns=plot['additional']['color'], values=plot['y'])
-                ax.imshow(pivot_df, cmap='YlOrRd')
-                ax.set_xticks(range(len(pivot_df.columns)))
-                ax.set_yticks(range(len(pivot_df.index)))
-                ax.set_xticklabels(pivot_df.columns)
-                ax.set_yticklabels(pivot_df.index)
-            ax.set_title(plot['title'])
-            if plot['plot_type'] != 'pie':
-                ax.set_xlabel(plot['x'])
-                ax.set_ylabel(plot['y'])
-            plt.tight_layout()
-            buf = io.BytesIO()
-            plt.savefig(buf, format='png')
-            buf.seek(0)
-            img = Image.open(buf)
-            images.append(img)
-            plt.close(fig)
-        return images if len(images) == 3 else images + [Image.new('RGB', (800, 600), (255,255,255))]*(3-len(images))
     except Exception as e:
-        error_message = f"Error: {str(e)}\n\nTraceback:\n{traceback.format_exc()}"
-        print(error_message)  # Print to console for debugging
-        error_image = Image.new('RGB', (800, 400), (255, 255, 255))
-        draw = ImageDraw.Draw(error_image)
-        draw.text((10, 10), error_message, fill=(255, 0, 0))
-        return [error_image] * 3
-with gr.Blocks(theme=gr.themes.Default()) as demo:
-    gr.Markdown("# Data Analysis Dashboard")
-    with gr.Row():
-        file = gr.File(label="Upload Dataset", file_types=[".csv", ".xlsx"])
-        instructions = gr.Textbox(label="Analysis Instructions", placeholder="Describe the analysis you want...")
-    submit = gr.Button("Generate Insights", variant="primary")
-    output_images = [gr.Image(label=f"Visualization {i+1}") for i in range(3)]
-    submit.click(
-        process_file,
-        inputs=[file, instructions],
-        outputs=output_images
-    )
-if __name__ == "__main__":
-    demo.launch()

+import base64
 import io
+import os
 import ast
 import traceback
+from threading import Thread
+import dash
+from dash import dcc, html, Input, Output, State
+import dash_bootstrap_components as dbc
+import pandas as pd
+import plotly.graph_objs as go
+import google.generativeai as genai
+# Initialize Dash app
+app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
+# Layout
+app.layout = dbc.Container([
+    html.H1("Data Analysis Dashboard", className="my-4"),
+    dbc.Card([
+        dbc.CardBody([
+            dcc.Upload(
+                id='upload-data',
+                children=html.Div([
+                    'Drag and Drop or ',
+                    html.A('Select Files')
+                ]),
+                style={
+                    'width': '100%',
+                    'height': '60px',
+                    'lineHeight': '60px',
+                    'borderWidth': '1px',
+                    'borderStyle': 'dashed',
+                    'borderRadius': '5px',
+                    'textAlign': 'center',
+                    'margin': '10px'
+                },
+                multiple=False
+            ),
+            dbc.Input(id="instructions", placeholder="Describe the analysis you want...", type="text"),
+            dbc.Button("Generate Insights", id="submit-button", color="primary", className="mt-3"),
+        ])
+    ], className="mb-4"),
+    dbc.Card([
+        dbc.CardBody([
+            dcc.Graph(id='visualization-1'),
+            dcc.Graph(id='visualization-2'),
+            dcc.Graph(id='visualization-3'),
+        ])
+    ])
+], fluid=True)
+def parse_contents(contents, filename):
+    content_type, content_string = contents.split(',')
+    decoded = base64.b64decode(content_string)
+    try:
+        if 'csv' in filename:
+            df = pd.read_csv(io.StringIO(decoded.decode('utf-8')))
+        elif 'xls' in filename:
+            df = pd.read_excel(io.BytesIO(decoded))
+        else:
+            return None
+        return df
+    except Exception as e:
+        print(e)
+        return None
+def process_data(df, instructions):
     try:
         # Initialize Gemini
         api_key = os.environ.get('GEMINI_API_KEY')
         genai.configure(api_key=api_key)
         model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25')
         # Generate visualization code
         response = model.generate_content(f"""
             Analyze the following dataset and instructions:
         elif '```' in code_block:
             code_block = code_block.split('```')[1].strip()
         plots = ast.literal_eval(code_block)
+        return plots
     except Exception as e:
+        print(f"Error in process_data: {str(e)}")
+        return None
+def generate_plot(df, plot_info):
+    plot_df = df.copy()
+    if plot_info['agg_func'] == 'sum':
+        plot_df = plot_df.groupby(plot_info['x'])[plot_info['y']].sum().reset_index()
+    elif plot_info['agg_func'] == 'mean':
+        plot_df = plot_df.groupby(plot_info['x'])[plot_info['y']].mean().reset_index()
+    elif plot_info['agg_func'] == 'count':
+        plot_df = plot_df.groupby(plot_info['x']).size().reset_index(name=plot_info['y'])
+    if 'top_n' in plot_info and plot_info['top_n']:
+        plot_df = plot_df.nlargest(plot_info['top_n'], plot_info['y'])
+    if plot_info['plot_type'] == 'bar':
+        fig = go.Figure(go.Bar(x=plot_df[plot_info['x']], y=plot_df[plot_info['y']]))
+    elif plot_info['plot_type'] == 'line':
+        fig = go.Figure(go.Scatter(x=plot_df[plot_info['x']], y=plot_df[plot_info['y']], mode='lines'))
+    elif plot_info['plot_type'] == 'scatter':
+        fig = go.Figure(go.Scatter(x=plot_df[plot_info['x']], y=plot_df[plot_info['y']], mode='markers'))
+    elif plot_info['plot_type'] == 'hist':
+        fig = go.Figure(go.Histogram(x=plot_df[plot_info['x']]))
+    elif plot_info['plot_type'] == 'pie':
+        fig = go.Figure(go.Pie(labels=plot_df[plot_info['x']], values=plot_df[plot_info['y']]))
+    elif plot_info['plot_type'] == 'heatmap':
+        pivot_df = plot_df.pivot(index=plot_info['x'], columns=plot_info['additional']['color'], values=plot_info['y'])
+        fig = go.Figure(go.Heatmap(z=pivot_df.values, x=pivot_df.columns, y=pivot_df.index))
+    fig.update_layout(title=plot_info['title'], xaxis_title=plot_info['x'], yaxis_title=plot_info['y'])
+    return fig
+@app.callback(
+    [Output('visualization-1', 'figure'),
+     Output('visualization-2', 'figure'),
+     Output('visualization-3', 'figure')],
+    [Input('submit-button', 'n_clicks')],
+    [State('upload-data', 'contents'),
+     State('upload-data', 'filename'),
+     State('instructions', 'value')]
+)
+def update_output(n_clicks, contents, filename, instructions):
+    if n_clicks is None or contents is None:
+        return dash.no_update, dash.no_update, dash.no_update
+    df = parse_contents(contents, filename)
+    if df is None:
+        return dash.no_update, dash.no_update, dash.no_update
+    plots = process_data(df, instructions)
+    if plots is None or len(plots) < 3:
+        return dash.no_update, dash.no_update, dash.no_update
+    figures = [generate_plot(df, plot_info) for plot_info in plots[:3]]
+    return figures
+if __name__ == '__main__':
+    app.run(debug=True, host='0.0.0.0', port=7860, threaded=True)