Spaces:

GMARTINEZMILLA
/

Final_Project

Sleeping

App Files Files Community

GMARTINEZMILLA commited on Oct 16, 2024

Commit

047c64c

1 Parent(s): c70eeb5

feat: updated website

Browse files

Files changed (1) hide show

app.py +350 -106

app.py CHANGED Viewed

@@ -241,6 +241,7 @@ if page == "Summary":
                         )}
                     )
 # Customer Analysis Page
 elif page == "Customer Analysis":
     st.markdown("""
     <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
@@ -265,7 +266,6 @@ elif page == "Customer Analysis":
                 customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
                 time.sleep(1)
                 if not customer_match.empty:
                     cluster = customer_match['cluster_id'].values[0]
@@ -313,121 +313,103 @@ elif page == "Customer Analysis":
                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
                     if not actual_sales.empty:
                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
                                                 how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
-                        results['ventas_reales'].fillna(0, inplace=True)
-                        # st.write("### Final Results DataFrame:")
-                        # st.write(results.head())
-                        # st.write(f"Shape: {results.shape}")
-                        # Calculate metrics only for non-null actual sales
-                        valid_results = results.dropna(subset=['ventas_reales'])
-                        non_zero_actuals = valid_results[valid_results['ventas_reales'] != 0]
-                        if not valid_results.empty:
-                            mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
-                            mape = np.mean(np.abs((non_zero_actuals['ventas_reales'] - non_zero_actuals['ventas_predichas']) / non_zero_actuals['ventas_reales'])) * 100
-                            rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
-                            # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
-                            # st.write(f"MAE: {mae:.2f}€")
-                            # st.write(f"MAPE: {mape:.2f}%")
-                            # st.write(f"RMSE: {rmse:.2f}")
-                    #     # Analysis of results
-                    #     threshold_good = 100  # You may want to adjust this threshold
-                    #     if mae < threshold_good:
-                    #         st.success(f"Customer {customer_code} is performing well based on the predictions.")
-                    #     else:
-                    #         st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
-                    # else:
-                    #     st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
-                    # st.write("### Debug Information for Radar Chart:")
-                    # st.write(f"Shape of customer_data: {customer_data.shape}")
-                    # st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
-                    # Get percentage of units sold for each manufacturer
-                    customer_df = df[df["CLIENTE"] == str(customer_code)]  # Get the customer data
-                    all_manufacturers = customer_df.iloc[:, 1:].T  # Exclude CLIENTE column (manufacturers are in columns)
-                    all_manufacturers.index = all_manufacturers.index.astype(str)
-                    # Get total sales for each manufacturer from euros_proveedor
-                    customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
-                    sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
-                    sales_data.index = sales_data.index.astype(str)
-                    # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
-                    sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
-                    # Ensure all values are numeric
-                    sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
-                    all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
-                    # Sort manufacturers by percentage of units and get top 10
-                    top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
-                    # Sort manufacturers by total sales and get top 10
-                    top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
-                    # Combine top manufacturers from both lists and get up to 20 unique manufacturers
-                    combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
-                    # Filter out manufacturers that are not present in both datasets
-                    combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
-                    # st.write(f"Number of combined top manufacturers: {len(combined_top)}")
-                    if combined_top:
-                        # Create a DataFrame with combined data for these top manufacturers
-                        combined_data = pd.DataFrame({
-                            'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
-                            'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
-                        }).fillna(0)
-                        # Sort by units, then by sales
-                        combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
-                        # Filter out manufacturers with 0 units
-                        non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
-                        # If we have less than 3 non-zero manufacturers, add some zero-value ones
-                        if len(non_zero_manufacturers) < 3:
-                            zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
-                            manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
-                        else:
-                            manufacturers_to_show = non_zero_manufacturers
-                        values = manufacturers_to_show['units'].tolist()
-                        amounts = manufacturers_to_show['sales'].tolist()
-                        manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
-                        # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
-                        # for manufacturer, value, amount in zip(manufacturers, values, amounts):
-                        #     (f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
-                        if manufacturers:  # Only create the chart if we have data
-                            fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
-                            st.pyplot(fig)
                         else:
-                            st.warning("No data available to create the radar chart.")
-                    else:
-                        st.warning("No combined top manufacturers found.")
-                    # Ensure codigo_cliente in ventas_clientes is a string
-                    ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
-                    # Ensure customer_code is a string and strip any spaces
-                    customer_code = str(customer_code).strip()
-                    # if customer_code in ventas_clientes['codigo_cliente'].unique():
-                    #     (f"Customer {customer_code} found in ventas_clientes")
-                    # else:
-                    #     (f"Customer {customer_code} not found in ventas_clientes")
-                    # Customer sales 2021-2024 (if data exists)
                     sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
                     if all(col in ventas_clientes.columns for col in sales_columns):
                         customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
@@ -442,13 +424,13 @@ elif page == "Customer Analysis":
                                 actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
                                 predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()
-                                # Estimate full-year predicted sales (assuming predictions available until September)
                                 months_available = 9  # Data available until September
                                 actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
                                 # Add 2024 actual and predicted sales
-                                sales_values = list(customer_sales) + [actual_sales_2024_annual]  # Actual sales
-                                predicted_values = list(customer_sales) + [predicted_sales_2024]  # Predicted sales
                                 # Add 2024 to the years list
                                 years.append('2024')
@@ -502,6 +484,268 @@ elif page == "Customer Analysis":
                         st.warning("Sales data for 2021-2023 not available in the dataset.")
 # Customer Recommendations Page
 elif page == "Articles Recommendations":
     st.title("Articles Recommendations")

                         )}
                     )
 # Customer Analysis Page
 elif page == "Customer Analysis":
     st.markdown("""
     <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
                 customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
                 time.sleep(1)
                 if not customer_match.empty:
                     cluster = customer_match['cluster_id'].values[0]
                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
                     if not actual_sales.empty:
+                        # Merge predictions with actual sales
                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
                                                 how='left')
                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
+                    else:
+                        # If no actual sales data for 2024, fill 'ventas_reales' with 0
+                        results['ventas_reales'] = 0
+                    # Ensure any missing sales data is filled with 0
+                    results['ventas_reales'].fillna(0, inplace=True)
+                    # Split space into two columns
+                    col1, col2 = st.columns(2)
+                    # Column 1: Radar chart for top manufacturers
+                    with col1:
+                        # Radar chart logic remains the same
+                        customer_df = df[df["CLIENTE"] == str(customer_code)]
+                        all_manufacturers = customer_df.iloc[:, 1:].T
+                        all_manufacturers.index = all_manufacturers.index.astype(str)
+                        customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
+                        sales_data = customer_euros.iloc[:, 1:].T
+                        sales_data.index = sales_data.index.astype(str)
+                        sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
+                        sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
+                        all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
+                        top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
+                        top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
+                        combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
+                        combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
+                        if combined_top:
+                            combined_data = pd.DataFrame({
+                                'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
+                                'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
+                            }).fillna(0)
+                            combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
+                            non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
+                            if len(non_zero_manufacturers) < 3:
+                                zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
+                                manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
+                            else:
+                                manufacturers_to_show = non_zero_manufacturers
+                            values = manufacturers_to_show['units'].tolist()
+                            amounts = manufacturers_to_show['sales'].tolist()
+                            manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
+                            if manufacturers:
+                                fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
+                                st.pyplot(fig)
+                    # Column 2: Alerts and additional analysis
+                    with col2:
+                        st.markdown(f"### Alerts for {customer_code}")
+                        # Identify manufacturers that didn't meet predicted sales
+                        underperforming_manufacturers = results[results['ventas_reales'] < results['ventas_predichas']]
+                        if not underperforming_manufacturers.empty:
+                            st.warning("Some manufacturers have not met predicted sales:")
+                            for index, row in underperforming_manufacturers.iterrows():
+                                manufacturer_name = get_supplier_name(row['marca_id_encoded'])
+                                predicted = row['ventas_predichas']
+                                actual = row['ventas_reales']
+                                delta = predicted - actual
+                                st.write(f"- {manufacturer_name}: Predicted = {predicted:.2f}€, Actual = {actual:.2f}€, Missed = {delta:.2f}€")
                         else:
+                            st.success("All manufacturers have met or exceeded predicted sales.")
+                        # Gráfico adicional: Comparar las ventas predichas y reales para los principales fabricantes
+                        st.markdown("### Predicted vs Actual Sales for Top Manufacturers")
+                        top_manufacturers = results.groupby('marca_id_encoded').agg({'ventas_reales': 'sum', 'ventas_predichas': 'sum'}).sort_values(by='ventas_reales', ascending=False).head(10)
+                        fig_comparison = go.Figure()
+                        fig_comparison.add_trace(go.Bar(x=top_manufacturers.index, y=top_manufacturers['ventas_reales'], name="Actual Sales", marker_color='blue'))
+                        fig_comparison.add_trace(go.Bar(x=top_manufacturers.index, y=top_manufacturers['ventas_predichas'], name="Predicted Sales", marker_color='orange'))
+                        fig_comparison.update_layout(
+                            title="Actual vs Predicted Sales by Top Manufacturers",
+                            xaxis_title="Manufacturer",
+                            yaxis_title="Sales (€)",
+                            barmode='group',
+                            height=400,
+                            hovermode="x unified"
+                        )
+                        st.plotly_chart(fig_comparison, use_container_width=True)
+                    # Ensure customer sales (2021-2024)
                     sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
                     if all(col in ventas_clientes.columns for col in sales_columns):
                         customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
                                 actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
                                 predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()
+                                # Estimate full-year predicted sales
                                 months_available = 9  # Data available until September
                                 actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
                                 # Add 2024 actual and predicted sales
+                                sales_values = list(customer_sales) + [actual_sales_2024_annual]
+                                predicted_values = list(customer_sales) + [predicted_sales_2024]
                                 # Add 2024 to the years list
                                 years.append('2024')
                         st.warning("Sales data for 2021-2023 not available in the dataset.")
+# elif page == "Customer Analysis":
+#     st.markdown("""
+#     <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
+#     <p style='text-align: center; font-size: 1.2rem; color: gray;'>
+#     Enter the customer code to explore detailed customer insights,
+#     including past sales, predictions for the current year, and manufacturer-specific information.
+#     </p>
+#     """, unsafe_allow_html=True)
+#     # Combine text input and dropdown into a single searchable selectbox
+#     customer_code = st.selectbox(
+#         "Search and Select Customer Code",
+#         df['CLIENTE'].unique(),  # All customer codes
+#         format_func=lambda x: str(x),  # Ensures the values are displayed as strings
+#         help="Start typing to search for a specific customer code"
+#     )
+#     if st.button("Calcular"):
+#         if customer_code:
+#             with st.spinner("We are identifying the customer's cluster..."):
+#                 # Find Customer's Cluster
+#                 customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
+#                 time.sleep(1)
+#                 if not customer_match.empty:
+#                     cluster = customer_match['cluster_id'].values[0]
+#             with st.spinner(f"Selecting predictive model..."):
+#                 # Load the Corresponding Model
+#                 model_path = f'models/modelo_cluster_{cluster}.txt'
+#                 gbm = lgb.Booster(model_file=model_path)
+#             with st.spinner("Getting the data ready..."):
+#                 # Load predict data for that cluster
+#                 predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
+#                 # Convert cliente_id to string
+#                 predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
+#             with st.spinner("Filtering data..."):
+#                 # Filter for the specific customer
+#                 customer_code_str = str(customer_code)
+#                 customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
+#             with st.spinner("Generating sales predictions..."):
+#                 if not customer_data.empty:
+#                     # Define features consistently with the training process
+#                     lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
+#                     features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
+#                     # Prepare data for prediction
+#                     X_predict = customer_data[features]
+#                     # Convert categorical features to 'category' dtype
+#                     categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
+#                     for feature in categorical_features:
+#                         X_predict[feature] = X_predict[feature].astype('category')
+#                     # Make Prediction for the selected customer
+#                     y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
+#                     # Reassemble the results
+#                     results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
+#                     results['ventas_predichas'] = y_pred
+#                     # Load actual data
+#                     actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
+#                     if not actual_sales.empty:
+#                         results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
+#                                                 on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
+#                                                 how='left')
+#                         results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
+#                         results['ventas_reales'].fillna(0, inplace=True)
+#                         # st.write("### Final Results DataFrame:")
+#                         # st.write(results.head())
+#                         # st.write(f"Shape: {results.shape}")
+#                         # Calculate metrics only for non-null actual sales
+#                         valid_results = results.dropna(subset=['ventas_reales'])
+#                         non_zero_actuals = valid_results[valid_results['ventas_reales'] != 0]
+#                         if not valid_results.empty:
+#                             mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
+#                             mape = np.mean(np.abs((non_zero_actuals['ventas_reales'] - non_zero_actuals['ventas_predichas']) / non_zero_actuals['ventas_reales'])) * 100
+#                             rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
+#                             # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
+#                             # st.write(f"MAE: {mae:.2f}€")
+#                             # st.write(f"MAPE: {mape:.2f}%")
+#                             # st.write(f"RMSE: {rmse:.2f}")
+#                     #     # Analysis of results
+#                     #     threshold_good = 100  # You may want to adjust this threshold
+#                     #     if mae < threshold_good:
+#                     #         st.success(f"Customer {customer_code} is performing well based on the predictions.")
+#                     #     else:
+#                     #         st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
+#                     # else:
+#                     #     st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
+#                     # st.write("### Debug Information for Radar Chart:")
+#                     # st.write(f"Shape of customer_data: {customer_data.shape}")
+#                     # st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
+#                     # Get percentage of units sold for each manufacturer
+#                     customer_df = df[df["CLIENTE"] == str(customer_code)]  # Get the customer data
+#                     all_manufacturers = customer_df.iloc[:, 1:].T  # Exclude CLIENTE column (manufacturers are in columns)
+#                     all_manufacturers.index = all_manufacturers.index.astype(str)
+#                     # Get total sales for each manufacturer from euros_proveedor
+#                     customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
+#                     sales_data = customer_euros.iloc[:, 1:].T  # Exclude CLIENTE column
+#                     sales_data.index = sales_data.index.astype(str)
+#                     # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
+#                     sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
+#                     # Ensure all values are numeric
+#                     sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
+#                     all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
+#                     # Sort manufacturers by percentage of units and get top 10
+#                     top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
+#                     # Sort manufacturers by total sales and get top 10
+#                     top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
+#                     # Combine top manufacturers from both lists and get up to 20 unique manufacturers
+#                     combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
+#                     # Filter out manufacturers that are not present in both datasets
+#                     combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
+#                     # st.write(f"Number of combined top manufacturers: {len(combined_top)}")
+#                     if combined_top:
+#                         # Create a DataFrame with combined data for these top manufacturers
+#                         combined_data = pd.DataFrame({
+#                             'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
+#                             'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
+#                         }).fillna(0)
+#                         # Sort by units, then by sales
+#                         combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
+#                         # Filter out manufacturers with 0 units
+#                         non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
+#                         # If we have less than 3 non-zero manufacturers, add some zero-value ones
+#                         if len(non_zero_manufacturers) < 3:
+#                             zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
+#                             manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
+#                         else:
+#                             manufacturers_to_show = non_zero_manufacturers
+#                         values = manufacturers_to_show['units'].tolist()
+#                         amounts = manufacturers_to_show['sales'].tolist()
+#                         manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
+#                         # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
+#                         # for manufacturer, value, amount in zip(manufacturers, values, amounts):
+#                         #     (f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
+#                         if manufacturers:  # Only create the chart if we have data
+#                             fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
+#                             st.pyplot(fig)
+#                         else:
+#                             st.warning("No data available to create the radar chart.")
+#                     else:
+#                         st.warning("No combined top manufacturers found.")
+#                     # Ensure codigo_cliente in ventas_clientes is a string
+#                     ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
+#                     # Ensure customer_code is a string and strip any spaces
+#                     customer_code = str(customer_code).strip()
+#                     # if customer_code in ventas_clientes['codigo_cliente'].unique():
+#                     #     (f"Customer {customer_code} found in ventas_clientes")
+#                     # else:
+#                     #     (f"Customer {customer_code} not found in ventas_clientes")
+#                     # Customer sales 2021-2024 (if data exists)
+#                     sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
+#                     if all(col in ventas_clientes.columns for col in sales_columns):
+#                         customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
+#                         if not customer_sales_data.empty:
+#                             customer_sales = customer_sales_data[sales_columns].values[0]
+#                             years = ['2021', '2022', '2023']
+#                             # Add the 2024 actual and predicted data
+#                             if 'ventas_predichas' in results.columns and 'ventas_reales' in results.columns:
+#                                 # Get the actual and predicted sales for 2024
+#                                 actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
+#                                 predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()
+#                                 # Estimate full-year predicted sales (assuming predictions available until September)
+#                                 months_available = 9  # Data available until September
+#                                 actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
+#                                 # Add 2024 actual and predicted sales
+#                                 sales_values = list(customer_sales) + [actual_sales_2024_annual]  # Actual sales
+#                                 predicted_values = list(customer_sales) + [predicted_sales_2024]  # Predicted sales
+#                                 # Add 2024 to the years list
+#                                 years.append('2024')
+#                                 fig_sales_bar = go.Figure()
+#                                 # Add trace for historical sales (2021-2023)
+#                                 fig_sales_bar.add_trace(go.Bar(
+#                                     x=years[:3],  # 2021, 2022, 2023
+#                                     y=sales_values[:3],
+#                                     name="Historical Sales",
+#                                     marker_color='blue'
+#                                 ))
+#                                 # Add trace for 2024 actual sales
+#                                 fig_sales_bar.add_trace(go.Bar(
+#                                     x=[years[3]],  # 2024
+#                                     y=[sales_values[3]],
+#                                     name="2024 Actual Sales (Annualized)",
+#                                     marker_color='green'
+#                                 ))
+#                                 # Add trace for 2024 predicted sales
+#                                 fig_sales_bar.add_trace(go.Bar(
+#                                     x=[years[3]],  # 2024
+#                                     y=[predicted_values[3]],
+#                                     name="2024 Predicted Sales",
+#                                     marker_color='orange'
+#                                 ))
+#                                 # Update layout
+#                                 fig_sales_bar.update_layout(
+#                                     title=f"Sales Over the Years for Customer {customer_code}",
+#                                     xaxis_title="Year",
+#                                     yaxis_title="Sales (€)",
+#                                     barmode='group',
+#                                     height=600,
+#                                     legend_title_text="Sales Type",
+#                                     hovermode="x unified"
+#                                 )
+#                                 # Show the interactive bar chart in Streamlit
+#                                 st.plotly_chart(fig_sales_bar, use_container_width=True)
+#                             else:
+#                                 st.warning(f"No predicted or actual data found for customer {customer_code} for 2024.")
+#                         else:
+#                             st.warning(f"No historical sales data found for customer {customer_code}")
+#                     else:
+#                         st.warning("Sales data for 2021-2023 not available in the dataset.")
 # Customer Recommendations Page
 elif page == "Articles Recommendations":
     st.title("Articles Recommendations")