Spaces:

Anupam202224
/

DataAnalysis-A

Sleeping

App Files Files Community

Anupam202224 commited on Oct 11, 2024

Commit

852ebe2

verified ·

1 Parent(s): 5058119

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -22

app.py CHANGED Viewed

@@ -7,12 +7,32 @@ import torch
 import matplotlib.pyplot as plt
 import seaborn as sns
 # Define constants
 MODEL_NAME = "gpt2"  # Publicly accessible model suitable for CPU
 FIGURES_DIR = "./figures"
-# Ensure the figures directory exists
 os.makedirs(FIGURES_DIR, exist_ok=True)
 # Initialize tokenizer and model
 print("Loading model and tokenizer...")
@@ -86,7 +106,7 @@ def analyze_data(data_file_path):
     try:
         data = pd.read_csv(data_file_path)
     except Exception as e:
-        return None, f"Error loading CSV file: {e}"
     # Generate data description
     data_description = f"- **Data Summary (.describe()):**\n{data.describe().to_markdown()}\n\n"
@@ -115,9 +135,10 @@ def analyze_data(data_file_path):
     plt.figure(figsize=(8, 6))
     sns.countplot(x=target, data=data)
     plt.title(f"Distribution of {target}")
-    plt.savefig(os.path.join(FIGURES_DIR, f"{target}_distribution.png"))
     plt.clf()
-    visualization_paths.append(os.path.join(FIGURES_DIR, f"{target}_distribution.png"))
     # Pairplot (limited to first 5 numeric columns for performance)
     numeric_cols = data.select_dtypes(include='number').columns[:5]
@@ -138,14 +159,14 @@ def interact_with_agent(file_input, additional_notes):
     os.makedirs(FIGURES_DIR, exist_ok=True)
     if file_input is None:
-        yield [("Error", "No file uploaded.")]
         return
     # Analyze the data
     data_description, visualization_paths, target = analyze_data(file_input.name)
     if data_description is None:
-        yield [("Error", visualization_paths)]  # visualization_paths contains the error message
         return
     # Construct the prompt for the model
@@ -157,18 +178,22 @@ def interact_with_agent(file_input, additional_notes):
     # Generate summary from the model
     summary = generate_summary(prompt)
-    # Prepare chat messages
     messages = [
-        ("User", "I have uploaded a CSV file for analysis."),
-        ("Assistant", "⏳ _Analyzing the data..._")
     ]
     # Append the summary
-    messages.append(("Assistant", summary))
     # Append images
     for image_path in visualization_paths:
-        messages.append(("Assistant", gr.Image.update(value=image_path)))
     yield messages
@@ -181,10 +206,10 @@ with gr.Blocks(
 ) as demo:
     gr.Markdown("""# 📊 Data Analyst Assistant
-    Upload a `.csv` file, add any additional notes, and **the assistant will analyze the data and generate visualizations and insights for you!**
-    **Example:** [Titanic Dataset](./example/titanic.csv)
-    """)
     with gr.Row():
         file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
@@ -194,14 +219,18 @@ with gr.Blocks(
         )
     submit = gr.Button("Run Analysis", variant="primary")
-    chatbot = gr.Chatbot(label="Data Analyst Agent")
-    gr.Examples(
-        examples=[["./example/titanic.csv", example_notes]],
-        inputs=[file_input, text_input],
-        label="Examples",
-        cache_examples=False
-    )
     # Connect the submit button to the interact_with_agent function
     submit.click(

 import matplotlib.pyplot as plt
 import seaborn as sns
+# Optional: Uncomment the following lines if you plan to use a gated model in the future
+# from huggingface_hub import login
 # Define constants
 MODEL_NAME = "gpt2"  # Publicly accessible model suitable for CPU
 FIGURES_DIR = "./figures"
+EXAMPLE_DIR = "./example"
+EXAMPLE_FILE = os.path.join(EXAMPLE_DIR, "titanic.csv")
+# Ensure the figures and example directories exist
 os.makedirs(FIGURES_DIR, exist_ok=True)
+os.makedirs(EXAMPLE_DIR, exist_ok=True)
+# Download the Titanic dataset if it doesn't exist
+if not os.path.isfile(EXAMPLE_FILE):
+    print("Downloading the Titanic dataset for examples...")
+    try:
+        # Using seaborn's built-in Titanic dataset
+        titanic = sns.load_dataset('titanic')
+        titanic.to_csv(EXAMPLE_FILE, index=False)
+        print(f"Example dataset saved to {EXAMPLE_FILE}.")
+    except Exception as e:
+        print(f"Failed to download the Titanic dataset: {e}")
+        print("Please ensure the 'example/titanic.csv' file exists.")
+        # Optionally, exit or continue without examples
+        # exit(1)
 # Initialize tokenizer and model
 print("Loading model and tokenizer...")
     try:
         data = pd.read_csv(data_file_path)
     except Exception as e:
+        return None, f"Error loading CSV file: {e}", None
     # Generate data description
     data_description = f"- **Data Summary (.describe()):**\n{data.describe().to_markdown()}\n\n"
     plt.figure(figsize=(8, 6))
     sns.countplot(x=target, data=data)
     plt.title(f"Distribution of {target}")
+    distribution_path = os.path.join(FIGURES_DIR, f"{target}_distribution.png")
+    plt.savefig(distribution_path)
     plt.clf()
+    visualization_paths.append(distribution_path)
     # Pairplot (limited to first 5 numeric columns for performance)
     numeric_cols = data.select_dtypes(include='number').columns[:5]
     os.makedirs(FIGURES_DIR, exist_ok=True)
     if file_input is None:
+        yield [{"role": "assistant", "content": "❌ No file uploaded. Please upload a CSV file to proceed."}]
         return
     # Analyze the data
     data_description, visualization_paths, target = analyze_data(file_input.name)
     if data_description is None:
+        yield [{"role": "assistant", "content": data_description}]  # data_description contains the error message
         return
     # Construct the prompt for the model
     # Generate summary from the model
     summary = generate_summary(prompt)
+    # Prepare chat messages in 'messages' format
     messages = [
+        {"role": "user", "content": "I have uploaded a CSV file for analysis."},
+        {"role": "assistant", "content": "⏳ _Analyzing the data..._"}
     ]
     # Append the summary
+    messages.append({"role": "assistant", "content": summary})
     # Append images
     for image_path in visualization_paths:
+        # Ensure the image path is valid before attempting to display
+        if os.path.isfile(image_path):
+            messages.append({"role": "assistant", "content": f"![{os.path.basename(image_path)}]({image_path})"})
+        else:
+            messages.append({"role": "assistant", "content": f"⚠️ Unable to find image: {image_path}"})
     yield messages
 ) as demo:
     gr.Markdown("""# 📊 Data Analyst Assistant
+Upload a `.csv` file, add any additional notes, and **the assistant will analyze the data and generate visualizations and insights for you!**
+**Example:** [Titanic Dataset](./example/titanic.csv)
+""")
     with gr.Row():
         file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
         )
     submit = gr.Button("Run Analysis", variant="primary")
+    chatbot = gr.Chatbot(label="Data Analyst Agent", type='messages', height=500)
+    # Handle examples only if the example file exists
+    if os.path.isfile(EXAMPLE_FILE):
+        gr.Examples(
+            examples=[[EXAMPLE_FILE, example_notes]],
+            inputs=[file_input, text_input],
+            label="Examples",
+            cache_examples=False
+        )
+    else:
+        gr.Markdown("**No example files available.** Please upload your own CSV files.")
     # Connect the submit button to the interact_with_agent function
     submit.click(