Anupam202224 commited on
Commit
852ebe2
·
verified ·
1 Parent(s): 5058119

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -22
app.py CHANGED
@@ -7,12 +7,32 @@ import torch
7
  import matplotlib.pyplot as plt
8
  import seaborn as sns
9
 
 
 
 
10
  # Define constants
11
  MODEL_NAME = "gpt2" # Publicly accessible model suitable for CPU
12
  FIGURES_DIR = "./figures"
 
 
13
 
14
- # Ensure the figures directory exists
15
  os.makedirs(FIGURES_DIR, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Initialize tokenizer and model
18
  print("Loading model and tokenizer...")
@@ -86,7 +106,7 @@ def analyze_data(data_file_path):
86
  try:
87
  data = pd.read_csv(data_file_path)
88
  except Exception as e:
89
- return None, f"Error loading CSV file: {e}"
90
 
91
  # Generate data description
92
  data_description = f"- **Data Summary (.describe()):**\n{data.describe().to_markdown()}\n\n"
@@ -115,9 +135,10 @@ def analyze_data(data_file_path):
115
  plt.figure(figsize=(8, 6))
116
  sns.countplot(x=target, data=data)
117
  plt.title(f"Distribution of {target}")
118
- plt.savefig(os.path.join(FIGURES_DIR, f"{target}_distribution.png"))
 
119
  plt.clf()
120
- visualization_paths.append(os.path.join(FIGURES_DIR, f"{target}_distribution.png"))
121
 
122
  # Pairplot (limited to first 5 numeric columns for performance)
123
  numeric_cols = data.select_dtypes(include='number').columns[:5]
@@ -138,14 +159,14 @@ def interact_with_agent(file_input, additional_notes):
138
  os.makedirs(FIGURES_DIR, exist_ok=True)
139
 
140
  if file_input is None:
141
- yield [("Error", "No file uploaded.")]
142
  return
143
 
144
  # Analyze the data
145
  data_description, visualization_paths, target = analyze_data(file_input.name)
146
 
147
  if data_description is None:
148
- yield [("Error", visualization_paths)] # visualization_paths contains the error message
149
  return
150
 
151
  # Construct the prompt for the model
@@ -157,18 +178,22 @@ def interact_with_agent(file_input, additional_notes):
157
  # Generate summary from the model
158
  summary = generate_summary(prompt)
159
 
160
- # Prepare chat messages
161
  messages = [
162
- ("User", "I have uploaded a CSV file for analysis."),
163
- ("Assistant", "⏳ _Analyzing the data..._")
164
  ]
165
 
166
  # Append the summary
167
- messages.append(("Assistant", summary))
168
 
169
  # Append images
170
  for image_path in visualization_paths:
171
- messages.append(("Assistant", gr.Image.update(value=image_path)))
 
 
 
 
172
 
173
  yield messages
174
 
@@ -181,10 +206,10 @@ with gr.Blocks(
181
  ) as demo:
182
  gr.Markdown("""# 📊 Data Analyst Assistant
183
 
184
- Upload a `.csv` file, add any additional notes, and **the assistant will analyze the data and generate visualizations and insights for you!**
185
 
186
- **Example:** [Titanic Dataset](./example/titanic.csv)
187
- """)
188
 
189
  with gr.Row():
190
  file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
@@ -194,14 +219,18 @@ with gr.Blocks(
194
  )
195
 
196
  submit = gr.Button("Run Analysis", variant="primary")
197
- chatbot = gr.Chatbot(label="Data Analyst Agent")
198
-
199
- gr.Examples(
200
- examples=[["./example/titanic.csv", example_notes]],
201
- inputs=[file_input, text_input],
202
- label="Examples",
203
- cache_examples=False
204
- )
 
 
 
 
205
 
206
  # Connect the submit button to the interact_with_agent function
207
  submit.click(
 
7
  import matplotlib.pyplot as plt
8
  import seaborn as sns
9
 
10
+ # Optional: Uncomment the following lines if you plan to use a gated model in the future
11
+ # from huggingface_hub import login
12
+
13
  # Define constants
14
  MODEL_NAME = "gpt2" # Publicly accessible model suitable for CPU
15
  FIGURES_DIR = "./figures"
16
+ EXAMPLE_DIR = "./example"
17
+ EXAMPLE_FILE = os.path.join(EXAMPLE_DIR, "titanic.csv")
18
 
19
+ # Ensure the figures and example directories exist
20
  os.makedirs(FIGURES_DIR, exist_ok=True)
21
+ os.makedirs(EXAMPLE_DIR, exist_ok=True)
22
+
23
+ # Download the Titanic dataset if it doesn't exist
24
+ if not os.path.isfile(EXAMPLE_FILE):
25
+ print("Downloading the Titanic dataset for examples...")
26
+ try:
27
+ # Using seaborn's built-in Titanic dataset
28
+ titanic = sns.load_dataset('titanic')
29
+ titanic.to_csv(EXAMPLE_FILE, index=False)
30
+ print(f"Example dataset saved to {EXAMPLE_FILE}.")
31
+ except Exception as e:
32
+ print(f"Failed to download the Titanic dataset: {e}")
33
+ print("Please ensure the 'example/titanic.csv' file exists.")
34
+ # Optionally, exit or continue without examples
35
+ # exit(1)
36
 
37
  # Initialize tokenizer and model
38
  print("Loading model and tokenizer...")
 
106
  try:
107
  data = pd.read_csv(data_file_path)
108
  except Exception as e:
109
+ return None, f"Error loading CSV file: {e}", None
110
 
111
  # Generate data description
112
  data_description = f"- **Data Summary (.describe()):**\n{data.describe().to_markdown()}\n\n"
 
135
  plt.figure(figsize=(8, 6))
136
  sns.countplot(x=target, data=data)
137
  plt.title(f"Distribution of {target}")
138
+ distribution_path = os.path.join(FIGURES_DIR, f"{target}_distribution.png")
139
+ plt.savefig(distribution_path)
140
  plt.clf()
141
+ visualization_paths.append(distribution_path)
142
 
143
  # Pairplot (limited to first 5 numeric columns for performance)
144
  numeric_cols = data.select_dtypes(include='number').columns[:5]
 
159
  os.makedirs(FIGURES_DIR, exist_ok=True)
160
 
161
  if file_input is None:
162
+ yield [{"role": "assistant", "content": "❌ No file uploaded. Please upload a CSV file to proceed."}]
163
  return
164
 
165
  # Analyze the data
166
  data_description, visualization_paths, target = analyze_data(file_input.name)
167
 
168
  if data_description is None:
169
+ yield [{"role": "assistant", "content": data_description}] # data_description contains the error message
170
  return
171
 
172
  # Construct the prompt for the model
 
178
  # Generate summary from the model
179
  summary = generate_summary(prompt)
180
 
181
+ # Prepare chat messages in 'messages' format
182
  messages = [
183
+ {"role": "user", "content": "I have uploaded a CSV file for analysis."},
184
+ {"role": "assistant", "content": "⏳ _Analyzing the data..._"}
185
  ]
186
 
187
  # Append the summary
188
+ messages.append({"role": "assistant", "content": summary})
189
 
190
  # Append images
191
  for image_path in visualization_paths:
192
+ # Ensure the image path is valid before attempting to display
193
+ if os.path.isfile(image_path):
194
+ messages.append({"role": "assistant", "content": f"![{os.path.basename(image_path)}]({image_path})"})
195
+ else:
196
+ messages.append({"role": "assistant", "content": f"⚠️ Unable to find image: {image_path}"})
197
 
198
  yield messages
199
 
 
206
  ) as demo:
207
  gr.Markdown("""# 📊 Data Analyst Assistant
208
 
209
+ Upload a `.csv` file, add any additional notes, and **the assistant will analyze the data and generate visualizations and insights for you!**
210
 
211
+ **Example:** [Titanic Dataset](./example/titanic.csv)
212
+ """)
213
 
214
  with gr.Row():
215
  file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
 
219
  )
220
 
221
  submit = gr.Button("Run Analysis", variant="primary")
222
+ chatbot = gr.Chatbot(label="Data Analyst Agent", type='messages', height=500)
223
+
224
+ # Handle examples only if the example file exists
225
+ if os.path.isfile(EXAMPLE_FILE):
226
+ gr.Examples(
227
+ examples=[[EXAMPLE_FILE, example_notes]],
228
+ inputs=[file_input, text_input],
229
+ label="Examples",
230
+ cache_examples=False
231
+ )
232
+ else:
233
+ gr.Markdown("**No example files available.** Please upload your own CSV files.")
234
 
235
  # Connect the submit button to the interact_with_agent function
236
  submit.click(