Spaces:
Sleeping
Sleeping
GMARTINEZMILLA
commited on
Commit
•
801e1c6
1
Parent(s):
3ac8646
feat: updated website script and requirements
Browse files- app.py +96 -57
- requirements.txt +6 -1
app.py
CHANGED
@@ -13,6 +13,7 @@ st.set_page_config(page_title="Customer Insights App", page_icon=":bar_chart:")
|
|
13 |
df = pd.read_csv("df_clean.csv")
|
14 |
nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
|
15 |
euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
|
|
|
16 |
|
17 |
# Ensure customer codes are strings
|
18 |
df['CLIENTE'] = df['CLIENTE'].astype(str)
|
@@ -119,84 +120,122 @@ elif page == "Customer Analysis":
|
|
119 |
customer_data = df[df["CLIENTE"] == str(customer_code)]
|
120 |
customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
|
121 |
|
|
|
122 |
if not customer_data.empty and not customer_euros.empty:
|
123 |
st.write(f"### Analysis for Customer {customer_code}")
|
124 |
|
125 |
-
#
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
-
|
143 |
-
|
|
|
144 |
|
145 |
-
|
146 |
-
|
147 |
|
148 |
-
|
149 |
-
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
}).fillna(0)
|
156 |
|
157 |
-
|
158 |
-
|
|
|
|
|
159 |
|
160 |
-
|
161 |
-
|
162 |
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
manufacturers_to_show = non_zero_manufacturers
|
169 |
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
193 |
else:
|
194 |
-
st.warning("
|
195 |
else:
|
196 |
st.warning(f"No data found for customer {customer_code}. Please check the code.")
|
197 |
else:
|
198 |
st.warning("Please select a customer.")
|
199 |
|
|
|
200 |
# Customer Recommendations Page
|
201 |
elif page == "Articles Recommendations":
|
202 |
st.title("Articles Recommendations")
|
|
|
13 |
df = pd.read_csv("df_clean.csv")
|
14 |
nombres_proveedores = pd.read_csv("nombres_proveedores.csv", sep=';')
|
15 |
euros_proveedor = pd.read_csv("euros_proveedor.csv", sep=',')
|
16 |
+
ventas_clientes = pd.read_csv("ventas_clientes.csv", sep=',')
|
17 |
|
18 |
# Ensure customer codes are strings
|
19 |
df['CLIENTE'] = df['CLIENTE'].astype(str)
|
|
|
120 |
customer_data = df[df["CLIENTE"] == str(customer_code)]
|
121 |
customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
|
122 |
|
123 |
+
# Check if customer data exists
|
124 |
if not customer_data.empty and not customer_euros.empty:
|
125 |
st.write(f"### Analysis for Customer {customer_code}")
|
126 |
|
127 |
+
# **Step 1: Find Customer's Cluster**
|
128 |
+
customer_clusters = pd.read_csv('predicts/customer_clusters.csv')
|
129 |
+
cluster = customer_clusters[customer_clusters['cliente_id'] == customer_code]['cluster_id'].values[0]
|
130 |
+
st.write(f"Customer {customer_code} belongs to cluster {cluster}")
|
131 |
+
|
132 |
+
# **Step 2: Load the Corresponding Model**
|
133 |
+
model_path = f'models/modelo_cluster_{cluster}.txt'
|
134 |
+
gbm = lgb.Booster(model_file=model_path)
|
135 |
+
st.write(f"Loaded model for cluster {cluster}")
|
136 |
+
|
137 |
+
# **Step 3: Load X_predict for that cluster and extract customer-specific data**
|
138 |
+
X_predict_cluster = pd.read_csv(f'predicts/X_predict_cluster_{cluster}.csv')
|
139 |
+
X_cliente = X_predict_cluster[X_predict_cluster['cliente_id'] == customer_code]
|
140 |
+
|
141 |
+
if not X_cliente.empty:
|
142 |
+
# **Step 4: Make Prediction for the selected customer**
|
143 |
+
y_pred = gbm.predict(X_cliente.drop(columns=['cliente_id']), num_iteration=gbm.best_iteration)
|
144 |
+
st.write(f"Predicted sales for Customer {customer_code}: {y_pred[0]:.2f}")
|
145 |
+
|
146 |
+
# **Step 5: Merge with actual data from df_agg_2024**
|
147 |
+
df_agg_2024 = pd.read_csv('predicts/df_agg_2024.csv')
|
148 |
+
actual_sales = df_agg_2024[(df_agg_2024['cliente_id'] == customer_code) & (df_agg_2024['marca_id_encoded'].isin(X_cliente['marca_id_encoded']))]
|
149 |
+
if not actual_sales.empty:
|
150 |
+
merged_data = pd.merge(
|
151 |
+
pd.DataFrame({'cliente_id': [customer_code], 'ventas_predichas': y_pred}),
|
152 |
+
actual_sales[['cliente_id', 'marca_id_encoded', 'precio_total']],
|
153 |
+
on='cliente_id',
|
154 |
+
how='left'
|
155 |
+
)
|
156 |
+
merged_data.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
|
157 |
+
|
158 |
+
# Calculate metrics (MAE, MAPE, RMSE, SMAPE)
|
159 |
+
mae = mean_absolute_error(merged_data['ventas_reales'], merged_data['ventas_predichas'])
|
160 |
+
mape = np.mean(np.abs((merged_data['ventas_reales'] - merged_data['ventas_predichas']) / merged_data['ventas_reales'])) * 100
|
161 |
+
rmse = np.sqrt(mean_squared_error(merged_data['ventas_reales'], merged_data['ventas_predichas']))
|
162 |
+
smape_value = smape(merged_data['ventas_reales'], merged_data['ventas_predichas'])
|
163 |
+
|
164 |
+
st.write(f"MAE: {mae:.2f}")
|
165 |
+
st.write(f"MAPE: {mape:.2f}%")
|
166 |
+
st.write(f"RMSE: {rmse:.2f}")
|
167 |
+
st.write(f"SMAPE: {smape_value:.2f}%")
|
168 |
+
|
169 |
+
# **Step 6: Analysis of results (show insights if the customer is performing well or not)**
|
170 |
+
if mae < threshold_good:
|
171 |
+
st.success(f"Customer {customer_code} is performing well based on the predictions.")
|
172 |
+
else:
|
173 |
+
st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
|
174 |
+
else:
|
175 |
+
st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
|
176 |
|
177 |
+
# **Show the radar chart**
|
178 |
+
all_manufacturers = customer_data.iloc[:, 1:].T # Exclude CLIENTE column
|
179 |
+
all_manufacturers.index = all_manufacturers.index.astype(str)
|
180 |
|
181 |
+
sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
|
182 |
+
sales_data.index = sales_data.index.astype(str)
|
183 |
|
184 |
+
sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
|
185 |
+
sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
|
186 |
|
187 |
+
top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
|
188 |
+
top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
|
189 |
+
combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
|
190 |
+
combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
|
|
|
191 |
|
192 |
+
combined_data = pd.DataFrame({
|
193 |
+
'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
|
194 |
+
'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
|
195 |
+
}).fillna(0)
|
196 |
|
197 |
+
combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
|
198 |
+
non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
|
199 |
|
200 |
+
if len(non_zero_manufacturers) < 3:
|
201 |
+
zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
|
202 |
+
manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
|
203 |
+
else:
|
204 |
+
manufacturers_to_show = non_zero_manufacturers
|
|
|
205 |
|
206 |
+
values = manufacturers_to_show['units'].tolist()
|
207 |
+
amounts = manufacturers_to_show['sales'].tolist()
|
208 |
+
manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
|
209 |
|
210 |
+
st.write(f"### Results for top {len(manufacturers)} manufacturers:")
|
211 |
+
for manufacturer, value, amount in zip(manufacturers, values, amounts):
|
212 |
+
st.write(f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
|
213 |
|
214 |
+
if manufacturers:
|
215 |
+
fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
|
216 |
+
st.pyplot(fig)
|
217 |
+
else:
|
218 |
+
st.warning("No data available to create the radar chart.")
|
219 |
|
220 |
+
# **Show sales over the years graph**
|
221 |
+
sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
|
222 |
+
if all(col in ventas_clientes.columns for col in sales_columns):
|
223 |
+
years = ['2021', '2022', '2023']
|
224 |
+
customer_sales = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code][sales_columns].values[0]
|
225 |
|
226 |
+
fig_sales = px.line(x=years, y=customer_sales, markers=True, title=f'Sales Over the Years for Customer {customer_code}')
|
227 |
+
fig_sales.update_layout(xaxis_title="Year", yaxis_title="Sales")
|
228 |
+
st.plotly_chart(fig_sales)
|
229 |
+
else:
|
230 |
+
st.warning("Sales data for 2021-2023 not available.")
|
231 |
else:
|
232 |
+
st.warning(f"No prediction data found for customer {customer_code}.")
|
233 |
else:
|
234 |
st.warning(f"No data found for customer {customer_code}. Please check the code.")
|
235 |
else:
|
236 |
st.warning("Please select a customer.")
|
237 |
|
238 |
+
|
239 |
# Customer Recommendations Page
|
240 |
elif page == "Articles Recommendations":
|
241 |
st.title("Articles Recommendations")
|
requirements.txt
CHANGED
@@ -1,3 +1,8 @@
|
|
1 |
plotly
|
2 |
matplotlib
|
3 |
-
scikit-learn
|
|
|
|
|
|
|
|
|
|
|
|
1 |
plotly
|
2 |
matplotlib
|
3 |
+
scikit-learn
|
4 |
+
streamlit
|
5 |
+
lightgbm
|
6 |
+
pandas
|
7 |
+
numpy
|
8 |
+
joblib
|