Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,12 +7,32 @@ import torch
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import seaborn as sns
|
9 |
|
|
|
|
|
|
|
10 |
# Define constants
|
11 |
MODEL_NAME = "gpt2" # Publicly accessible model suitable for CPU
|
12 |
FIGURES_DIR = "./figures"
|
|
|
|
|
13 |
|
14 |
-
# Ensure the figures
|
15 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Initialize tokenizer and model
|
18 |
print("Loading model and tokenizer...")
|
@@ -86,7 +106,7 @@ def analyze_data(data_file_path):
|
|
86 |
try:
|
87 |
data = pd.read_csv(data_file_path)
|
88 |
except Exception as e:
|
89 |
-
return None, f"Error loading CSV file: {e}"
|
90 |
|
91 |
# Generate data description
|
92 |
data_description = f"- **Data Summary (.describe()):**\n{data.describe().to_markdown()}\n\n"
|
@@ -115,9 +135,10 @@ def analyze_data(data_file_path):
|
|
115 |
plt.figure(figsize=(8, 6))
|
116 |
sns.countplot(x=target, data=data)
|
117 |
plt.title(f"Distribution of {target}")
|
118 |
-
|
|
|
119 |
plt.clf()
|
120 |
-
visualization_paths.append(
|
121 |
|
122 |
# Pairplot (limited to first 5 numeric columns for performance)
|
123 |
numeric_cols = data.select_dtypes(include='number').columns[:5]
|
@@ -138,14 +159,14 @@ def interact_with_agent(file_input, additional_notes):
|
|
138 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
139 |
|
140 |
if file_input is None:
|
141 |
-
yield [
|
142 |
return
|
143 |
|
144 |
# Analyze the data
|
145 |
data_description, visualization_paths, target = analyze_data(file_input.name)
|
146 |
|
147 |
if data_description is None:
|
148 |
-
yield [
|
149 |
return
|
150 |
|
151 |
# Construct the prompt for the model
|
@@ -157,18 +178,22 @@ def interact_with_agent(file_input, additional_notes):
|
|
157 |
# Generate summary from the model
|
158 |
summary = generate_summary(prompt)
|
159 |
|
160 |
-
# Prepare chat messages
|
161 |
messages = [
|
162 |
-
|
163 |
-
|
164 |
]
|
165 |
|
166 |
# Append the summary
|
167 |
-
messages.append(
|
168 |
|
169 |
# Append images
|
170 |
for image_path in visualization_paths:
|
171 |
-
|
|
|
|
|
|
|
|
|
172 |
|
173 |
yield messages
|
174 |
|
@@ -181,10 +206,10 @@ with gr.Blocks(
|
|
181 |
) as demo:
|
182 |
gr.Markdown("""# 📊 Data Analyst Assistant
|
183 |
|
184 |
-
|
185 |
|
186 |
-
|
187 |
-
|
188 |
|
189 |
with gr.Row():
|
190 |
file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
|
@@ -194,14 +219,18 @@ with gr.Blocks(
|
|
194 |
)
|
195 |
|
196 |
submit = gr.Button("Run Analysis", variant="primary")
|
197 |
-
chatbot = gr.Chatbot(label="Data Analyst Agent")
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
205 |
|
206 |
# Connect the submit button to the interact_with_agent function
|
207 |
submit.click(
|
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import seaborn as sns
|
9 |
|
10 |
+
# Optional: Uncomment the following lines if you plan to use a gated model in the future
|
11 |
+
# from huggingface_hub import login
|
12 |
+
|
13 |
# Define constants
|
14 |
MODEL_NAME = "gpt2" # Publicly accessible model suitable for CPU
|
15 |
FIGURES_DIR = "./figures"
|
16 |
+
EXAMPLE_DIR = "./example"
|
17 |
+
EXAMPLE_FILE = os.path.join(EXAMPLE_DIR, "titanic.csv")
|
18 |
|
19 |
+
# Ensure the figures and example directories exist
|
20 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
21 |
+
os.makedirs(EXAMPLE_DIR, exist_ok=True)
|
22 |
+
|
23 |
+
# Download the Titanic dataset if it doesn't exist
|
24 |
+
if not os.path.isfile(EXAMPLE_FILE):
|
25 |
+
print("Downloading the Titanic dataset for examples...")
|
26 |
+
try:
|
27 |
+
# Using seaborn's built-in Titanic dataset
|
28 |
+
titanic = sns.load_dataset('titanic')
|
29 |
+
titanic.to_csv(EXAMPLE_FILE, index=False)
|
30 |
+
print(f"Example dataset saved to {EXAMPLE_FILE}.")
|
31 |
+
except Exception as e:
|
32 |
+
print(f"Failed to download the Titanic dataset: {e}")
|
33 |
+
print("Please ensure the 'example/titanic.csv' file exists.")
|
34 |
+
# Optionally, exit or continue without examples
|
35 |
+
# exit(1)
|
36 |
|
37 |
# Initialize tokenizer and model
|
38 |
print("Loading model and tokenizer...")
|
|
|
106 |
try:
|
107 |
data = pd.read_csv(data_file_path)
|
108 |
except Exception as e:
|
109 |
+
return None, f"Error loading CSV file: {e}", None
|
110 |
|
111 |
# Generate data description
|
112 |
data_description = f"- **Data Summary (.describe()):**\n{data.describe().to_markdown()}\n\n"
|
|
|
135 |
plt.figure(figsize=(8, 6))
|
136 |
sns.countplot(x=target, data=data)
|
137 |
plt.title(f"Distribution of {target}")
|
138 |
+
distribution_path = os.path.join(FIGURES_DIR, f"{target}_distribution.png")
|
139 |
+
plt.savefig(distribution_path)
|
140 |
plt.clf()
|
141 |
+
visualization_paths.append(distribution_path)
|
142 |
|
143 |
# Pairplot (limited to first 5 numeric columns for performance)
|
144 |
numeric_cols = data.select_dtypes(include='number').columns[:5]
|
|
|
159 |
os.makedirs(FIGURES_DIR, exist_ok=True)
|
160 |
|
161 |
if file_input is None:
|
162 |
+
yield [{"role": "assistant", "content": "❌ No file uploaded. Please upload a CSV file to proceed."}]
|
163 |
return
|
164 |
|
165 |
# Analyze the data
|
166 |
data_description, visualization_paths, target = analyze_data(file_input.name)
|
167 |
|
168 |
if data_description is None:
|
169 |
+
yield [{"role": "assistant", "content": data_description}] # data_description contains the error message
|
170 |
return
|
171 |
|
172 |
# Construct the prompt for the model
|
|
|
178 |
# Generate summary from the model
|
179 |
summary = generate_summary(prompt)
|
180 |
|
181 |
+
# Prepare chat messages in 'messages' format
|
182 |
messages = [
|
183 |
+
{"role": "user", "content": "I have uploaded a CSV file for analysis."},
|
184 |
+
{"role": "assistant", "content": "⏳ _Analyzing the data..._"}
|
185 |
]
|
186 |
|
187 |
# Append the summary
|
188 |
+
messages.append({"role": "assistant", "content": summary})
|
189 |
|
190 |
# Append images
|
191 |
for image_path in visualization_paths:
|
192 |
+
# Ensure the image path is valid before attempting to display
|
193 |
+
if os.path.isfile(image_path):
|
194 |
+
messages.append({"role": "assistant", "content": f""})
|
195 |
+
else:
|
196 |
+
messages.append({"role": "assistant", "content": f"⚠️ Unable to find image: {image_path}"})
|
197 |
|
198 |
yield messages
|
199 |
|
|
|
206 |
) as demo:
|
207 |
gr.Markdown("""# 📊 Data Analyst Assistant
|
208 |
|
209 |
+
Upload a `.csv` file, add any additional notes, and **the assistant will analyze the data and generate visualizations and insights for you!**
|
210 |
|
211 |
+
**Example:** [Titanic Dataset](./example/titanic.csv)
|
212 |
+
""")
|
213 |
|
214 |
with gr.Row():
|
215 |
file_input = gr.File(label="Upload CSV File", file_types=[".csv"])
|
|
|
219 |
)
|
220 |
|
221 |
submit = gr.Button("Run Analysis", variant="primary")
|
222 |
+
chatbot = gr.Chatbot(label="Data Analyst Agent", type='messages', height=500)
|
223 |
+
|
224 |
+
# Handle examples only if the example file exists
|
225 |
+
if os.path.isfile(EXAMPLE_FILE):
|
226 |
+
gr.Examples(
|
227 |
+
examples=[[EXAMPLE_FILE, example_notes]],
|
228 |
+
inputs=[file_input, text_input],
|
229 |
+
label="Examples",
|
230 |
+
cache_examples=False
|
231 |
+
)
|
232 |
+
else:
|
233 |
+
gr.Markdown("**No example files available.** Please upload your own CSV files.")
|
234 |
|
235 |
# Connect the submit button to the interact_with_agent function
|
236 |
submit.click(
|