Spaces:

MicroHealth
/

autodata-visualizer

Paused

App Files Files Community

bluenevus commited on Apr 12

Commit

a5f2a3b

verified ·

1 Parent(s): 72c5969

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -16

app.py CHANGED Viewed

@@ -19,11 +19,24 @@ def process_file(file, instructions, api_key):
         # Generate visualization code
         response = model.generate_content(f"""
-            Create 3 matplotlib visualization codes based on: {instructions}
             Data columns: {list(df.columns)}
-            Return Python code as: [('title','plot_type','x','y'), ...]
-            Allowed plot_types: bar, line, scatter, hist
-            Use only DataFrame 'df' and these exact variable names.
         """)
         # Extract code block safely
@@ -42,20 +55,23 @@ def process_file(file, instructions, api_key):
         images = []
         for plot in plots[:3]:  # Ensure max 3 plots
             fig, ax = plt.subplots(figsize=(10, 6))
-            title, plot_type, x, y = plot
-            if plot_type == 'bar':
-                df.plot(kind='bar', x=x, y=y, ax=ax)
-            elif plot_type == 'line':
-                df.plot(kind='line', x=x, y=y, ax=ax)
-            elif plot_type == 'scatter':
-                df.plot(kind='scatter', x=x, y=y, ax=ax)
-            elif plot_type == 'hist':
-                df[x].hist(ax=ax)
-            ax.set_title(title)
-            ax.set_xlabel(x)
-            ax.set_ylabel(y if y else 'Frequency')
             plt.tight_layout()
             buf = io.BytesIO()

         # Generate visualization code
         response = model.generate_content(f"""
+            Analyze the following dataset and instructions:
             Data columns: {list(df.columns)}
+            Instructions: {instructions}
+            Based on this, create 3 appropriate visualizations. For each visualization, provide:
+            1. A title
+            2. The most suitable plot type (choose from: bar, line, scatter, hist)
+            3. The column to use for the x-axis
+            4. The column(s) to use for the y-axis (can be a list for multiple columns, or None for histograms)
+            5. Any necessary data preprocessing steps (e.g., grouping, sorting, etc.)
+            Return your response as a Python list of dictionaries:
+            [
+                {{"title": "...", "plot_type": "...", "x": "...", "y": "...", "preprocessing": "..."}},
+                {{"title": "...", "plot_type": "...", "x": "...", "y": "...", "preprocessing": "..."}},
+                {{"title": "...", "plot_type": "...", "x": "...", "y": "...", "preprocessing": "..."}}
+            ]
         """)
         # Extract code block safely
         images = []
         for plot in plots[:3]:  # Ensure max 3 plots
             fig, ax = plt.subplots(figsize=(10, 6))
+            # Apply preprocessing if any
+            if plot['preprocessing']:
+                exec(plot['preprocessing'])
+            if plot['plot_type'] == 'bar':
+                df.plot(kind='bar', x=plot['x'], y=plot['y'], ax=ax)
+            elif plot['plot_type'] == 'line':
+                df.plot(kind='line', x=plot['x'], y=plot['y'], ax=ax)
+            elif plot['plot_type'] == 'scatter':
+                df.plot(kind='scatter', x=plot['x'], y=plot['y'], ax=ax)
+            elif plot['plot_type'] == 'hist':
+                df[plot['x']].hist(ax=ax)
+            ax.set_title(plot['title'])
+            ax.set_xlabel(plot['x'])
+            ax.set_ylabel(plot['y'] if plot['y'] else 'Frequency')
             plt.tight_layout()
             buf = io.BytesIO()