Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 24

Commit

925e1b1

verified ·

1 Parent(s): eeb6964

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -73

app.py CHANGED Viewed

@@ -234,6 +234,7 @@ def update_vector_store_with_plot(plot_text, existing_vector_store):
     return existing_vector_store
 def extract_plot_data(plot_info, df):
     plot_type = plot_info["type"]
     x_col = plot_info["x"]
     y_col = plot_info["y"] if "y" in plot_info else None
@@ -271,6 +272,16 @@ def extract_plot_data(plot_info, df):
                     plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
     return plot_text
 def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
@@ -368,6 +379,34 @@ def display_dataset_preview():
         st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
         st.markdown("---")
 # Main App
 def main():
     # Header
@@ -562,84 +601,89 @@ def main():
                 new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
                 update_cleaned_data(new_df)
-    elif app_mode == "EDA":
-        st.header("🔍 Interactive Data Explorer")
         if 'cleaned_data' not in st.session_state:
             st.warning("Please upload and clean data first.")
             st.stop()
         df = st.session_state.cleaned_data.copy()
-        enhance_section_title("Dataset Overview")
-        with st.container():
-            col1, col2, col3, col4 = st.columns(4)
-            col1.metric("Total Rows", df.shape[0])
-            col2.metric("Total Columns", df.shape[1])
-            missing_percentage = df.isna().sum().sum() / df.size * 100
-            col3.metric("Missing Values", f"{df.isna().sum().sum()} ({missing_percentage:.1f}%)")
-            col4.metric("Duplicates", df.duplicated().sum())
-        tab1, tab2, tab3 = st.tabs(["Quick Preview", "Column Types", "Missing Matrix"])
-        with tab1:
-            st.write("First few rows of the dataset:")
-            st.dataframe(df.head(), use_container_width=True)
-        with tab2:
-            st.write("Column Data Types:")
-            type_counts = df.dtypes.value_counts().reset_index()
-            type_counts.columns = ['Type', 'Count']
-            st.dataframe(type_counts, use_container_width=True)
-        with tab3:
-            st.write("Missing Values Matrix:")
-            fig_missing = px.imshow(df.isna(), color_continuous_scale=['#e0e0e0', '#66c2a5'])
-            fig_missing.update_layout(coloraxis_colorscale=[[0, 'lightgrey'], [1, '#FF4B4B']])
-            st.plotly_chart(fig_missing, use_container_width=True)
-        enhance_section_title("Interactive Visualization Builder")
-        with st.container():
-            col1, col2 = st.columns([1, 3])
-            with col1:
-                plot_type = st.selectbox("Choose visualization type", [
-                    "Scatter Plot", "Histogram", "Box Plot", "Line Chart", "Bar Chart", "Correlation Matrix"
-                ])
-                x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
-                y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Line Chart"] else None
-                color_by = st.selectbox("Color encoding", ["None"] + df.columns.tolist(), format_func=lambda x: "No color" if x == "None" else x) if plot_type != "Correlation Matrix" else None
-            with col2:
-                try:
-                    fig = None
-                    if plot_type == "Scatter Plot" and x_axis and y_axis:
-                        fig = px.scatter(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Scatter Plot of {x_axis} vs {y_axis}')
-                    elif plot_type == "Histogram" and x_axis:
-                        fig = px.histogram(df, x=x_axis, color=color_by if color_by != "None" else None, nbins=30, title=f'Histogram of {x_axis}')
-                    elif plot_type == "Box Plot" and x_axis and y_axis:
-                        fig = px.box(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Box Plot of {x_axis} vs {y_axis}')
-                    elif plot_type == "Line Chart" and x_axis and y_axis:
-                        fig = px.line(df, x=x_axis, y=y_axis, color=color_by if color_by != "None" else None, title=f'Line Chart of {x_axis} vs {y_axis}')
-                    elif plot_type == "Bar Chart" and x_axis:
-                        fig = px.bar(df, x=x_axis, color=color_by if color_by != "None" else None, title=f'Bar Chart of {x_axis}')
-                    elif plot_type == "Correlation Matrix":
-                        numeric_df = df.select_dtypes(include=np.number)
-                        if len(numeric_df.columns) > 1:
-                            corr = numeric_df.corr()
-                            fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
-                    if fig:
-                        fig.update_layout(template="plotly_white")
-                        st.plotly_chart(fig, use_container_width=True)
-                        st.session_state.last_plot = {
-                            "type": plot_type,
-                            "x": x_axis,
-                            "y": y_axis,
-                            "data": df[[x_axis, y_axis]].to_json() if y_axis else df[[x_axis]].to_json()
-                        }
-                        plot_text = extract_plot_data(st.session_state.last_plot, df)
-                        st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
-                        with st.expander("Extracted Plot Data"):
-                            st.text(plot_text)
-                    else:
-                        st.error("Please provide required inputs for the selected plot type.")
-                except Exception as e:
-                    st.error(f"Couldn't create visualization: {str(e)}")
     # Chatbot Section
     st.markdown("---")

     return existing_vector_store
 def extract_plot_data(plot_info, df):
+    # Updated to handle Plotly.js JSON
     plot_type = plot_info["type"]
     x_col = plot_info["x"]
     y_col = plot_info["y"] if "y" in plot_info else None
                     plot_text += f"{col1} vs {col2}: {corr.loc[col2, col1]:.2f}\n"
     return plot_text
+def generate_3d_scatter_plot(params):
+    df = st.session_state.cleaned_data
+    match = re.search(r"([\w\s]+)\s+vs\s+([\w\s]+)\s+vs\s+([\w\s]+)", params)
+    if match and len(match.groups()) >= 3:
+        x_axis, y_axis, z_axis = match.group(1).strip(), match.group(2).strip(), match.group(3).strip()
+        if x_axis in df.columns and y_axis in df.columns and z_axis in df.columns:
+            fig = px.scatter_3d(df, x=x_axis, y=y_axis, z=z_axis, title=f'3D Scatter Plot of {x_axis} vs {y_axis} vs {z_axis}')
+            return fig.to_json()
+    return None
 def get_chatbot_response(user_input, app_mode, vector_store=None, model="llama3-70b-8192"):
     system_prompt = (
         "You are an AI assistant in Data-Vision Pro, a data analysis app with RAG capabilities. "
         st.dataframe(st.session_state.cleaned_data.head(10), use_container_width=True)
         st.markdown("---")
+def suggest_data_cleaning(df):
+    suggestions = []
+    if df.isna().sum().sum() > 0:
+        for col in df.columns:
+            na_count = df[col].isna().sum()
+            if na_count > 0:
+                if na_count / df.shape[0] > 0.5:
+                    suggestions.append(f"- Drop column '{col}' (>{50}% missing values)")
+                else:
+                    suggestions.append(f"- Impute missing values in column '{col}' ({na_count} missing values)")
+    return "\n".join(suggestions) if suggestions else "No automatic cleaning suggestions."
+def parse_command(command):
+    # ... (Previous command parser) ...
+    elif "show a 3d scatter plot" in command or "3d scatter plot of" in command:
+        params = command.replace("show a 3d scatter plot of", "").replace("3d scatter plot of", "").strip()
+        return generate_3d_scatter_plot, params
+    # ... (rest of the function is same)
+def parse_multistep_command(command):
+    steps = command.split(';')
+    parsed_steps = []
+    for step in steps:
+        func, param = parse_command(step.strip())
+        if func:
+            parsed_steps.append((func, param))
+    return parsed_steps
 # Main App
 def main():
     # Header
                 new_df[scale_cols] = scaler.fit_transform(new_df[scale_cols])
                 update_cleaned_data(new_df)
+      elif app_mode == "EDA":
+        st.header("🔍 Exploratory Data Analysis (EDA)")
         if 'cleaned_data' not in st.session_state:
             st.warning("Please upload and clean data first.")
             st.stop()
         df = st.session_state.cleaned_data.copy()
+        st.markdown("### Dataset Overview")
+        col1, col2, col3 = st.columns(3)
+        col1.metric("Rows", df.shape[0])
+        col2.metric("Columns", df.shape[1])
+        col3.metric("Missing Values", df.isna().sum().sum())
+        # Interactive Visualization Builder with Plotly.js
+        st.markdown("### Interactive Visualization Builder")
+        plot_type = st.selectbox("Choose visualization type", [
+            "Scatter Plot", "Histogram", "Box Plot", "Line Chart", "Bar Chart", "Correlation Matrix", "3D Scatter Plot"
+        ])
+        x_axis = st.selectbox("X-axis", df.columns) if plot_type != "Correlation Matrix" else None
+        y_axis = st.selectbox("Y-axis", df.columns) if plot_type in ["Scatter Plot", "Box Plot", "Line Chart", "3D Scatter Plot"] else None
+        z_axis = st.selectbox("Z-axis", df.columns) if plot_type == "3D Scatter Plot" else None
+        generate_plot = st.button("Generate Plot")
+        if generate_plot:
+            fig_json = None
+            try:
+                if plot_type == "Scatter Plot":
+                    fig = px.scatter(df, x=x_axis, y=y_axis, title=f'Scatter Plot of {x_axis} vs {y_axis}')
+                    fig_json = fig.to_json()
+                elif plot_type == "Histogram":
+                    fig = px.histogram(df, x=x_axis, title=f'Histogram of {x_axis}')
+                    fig_json = fig.to_json()
+                elif plot_type == "Box Plot":
+                    fig = px.box(df, x=x_axis, y=y_axis, title=f'Box Plot of {x_axis} vs {y_axis}')
+                    fig_json = fig.to_json()
+                elif plot_type == "Line Chart":
+                    fig = px.line(df, x=x_axis, y=y_axis, title=f'Line Chart of {x_axis} vs {y_axis}')
+                    fig_json = fig.to_json()
+                elif plot_type == "Bar Chart":
+                    fig = px.bar(df, x=x_axis, title=f'Bar Chart of {x_axis}')
+                    fig_json = fig.to_json()
+                elif plot_type == "Correlation Matrix":
+                    numeric_df = df.select_dtypes(include=np.number)
+                    if len(numeric_df.columns) > 1:
+                        corr = numeric_df.corr()
+                        fig = px.imshow(corr, text_auto=True, color_continuous_scale='RdBu_r', zmin=-1, zmax=1, title='Correlation Matrix')
+                        fig_json = fig.to_json()
+                elif plot_type == "3D Scatter Plot":
+                    fig_json = generate_3d_scatter_plot(f"{x_axis} vs {y_axis} vs {z_axis}")
+                if fig_json:
+                    # Render Plotly.js Chart
+                    st.components.v1.html(f"""
+                        <div id="plotly-chart"></div>
+                        <script>
+                            Plotly.newPlot('plotly-chart', {fig_json});
+                        </script>
+                    """, height=600)
+                    # Store Plotly JSON in session state
+                    st.session_state.last_plot = {
+                        "type": plot_type,
+                        "x": x_axis,
+                        "y": y_axis,
+                        "z": z_axis if plot_type == "3D Scatter Plot" else None,
+                        "data": fig_json
+                    }
+                    # Extract and display plot data
+                    plot_text = extract_plot_data(st.session_state.last_plot, df)
+                    st.session_state.vector_store = update_vector_store_with_plot(plot_text, st.session_state.vector_store)
+                    with st.expander("Extracted Plot Data"):
+                        st.text(plot_text)
+            except Exception as e:
+                st.error(f"Couldn't generate plot: {str(e)}")
     # Chatbot Section
     st.markdown("---")