GMARTINEZMILLA commited on
Commit
047c64c
·
1 Parent(s): c70eeb5

feat: updated website

Browse files
Files changed (1) hide show
  1. app.py +350 -106
app.py CHANGED
@@ -241,6 +241,7 @@ if page == "Summary":
241
  )}
242
  )
243
  # Customer Analysis Page
 
244
  elif page == "Customer Analysis":
245
  st.markdown("""
246
  <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
@@ -265,7 +266,6 @@ elif page == "Customer Analysis":
265
  customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
266
  time.sleep(1)
267
 
268
-
269
  if not customer_match.empty:
270
  cluster = customer_match['cluster_id'].values[0]
271
 
@@ -313,121 +313,103 @@ elif page == "Customer Analysis":
313
  actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
314
 
315
  if not actual_sales.empty:
 
316
  results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
317
  on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
318
  how='left')
319
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
320
- results['ventas_reales'].fillna(0, inplace=True)
321
- # st.write("### Final Results DataFrame:")
322
- # st.write(results.head())
323
- # st.write(f"Shape: {results.shape}")
324
-
325
- # Calculate metrics only for non-null actual sales
326
- valid_results = results.dropna(subset=['ventas_reales'])
327
- non_zero_actuals = valid_results[valid_results['ventas_reales'] != 0]
328
- if not valid_results.empty:
329
- mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
330
- mape = np.mean(np.abs((non_zero_actuals['ventas_reales'] - non_zero_actuals['ventas_predichas']) / non_zero_actuals['ventas_reales'])) * 100
331
- rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
332
-
333
- # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
334
- # st.write(f"MAE: {mae:.2f}€")
335
- # st.write(f"MAPE: {mape:.2f}%")
336
- # st.write(f"RMSE: {rmse:.2f}")
337
-
338
- # # Analysis of results
339
- # threshold_good = 100 # You may want to adjust this threshold
340
- # if mae < threshold_good:
341
- # st.success(f"Customer {customer_code} is performing well based on the predictions.")
342
- # else:
343
- # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
344
- # else:
345
- # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
346
-
347
- # st.write("### Debug Information for Radar Chart:")
348
- # st.write(f"Shape of customer_data: {customer_data.shape}")
349
- # st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
350
-
351
- # Get percentage of units sold for each manufacturer
352
- customer_df = df[df["CLIENTE"] == str(customer_code)] # Get the customer data
353
- all_manufacturers = customer_df.iloc[:, 1:].T # Exclude CLIENTE column (manufacturers are in columns)
354
- all_manufacturers.index = all_manufacturers.index.astype(str)
355
-
356
- # Get total sales for each manufacturer from euros_proveedor
357
- customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
358
- sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
359
- sales_data.index = sales_data.index.astype(str)
360
-
361
- # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
362
- sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
363
-
364
- # Ensure all values are numeric
365
- sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
366
- all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
367
-
368
- # Sort manufacturers by percentage of units and get top 10
369
- top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
370
-
371
- # Sort manufacturers by total sales and get top 10
372
- top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
373
-
374
- # Combine top manufacturers from both lists and get up to 20 unique manufacturers
375
- combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
376
-
377
- # Filter out manufacturers that are not present in both datasets
378
- combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
379
-
380
- # st.write(f"Number of combined top manufacturers: {len(combined_top)}")
381
-
382
- if combined_top:
383
- # Create a DataFrame with combined data for these top manufacturers
384
- combined_data = pd.DataFrame({
385
- 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
386
- 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
387
- }).fillna(0)
388
-
389
- # Sort by units, then by sales
390
- combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
391
-
392
- # Filter out manufacturers with 0 units
393
- non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
394
-
395
- # If we have less than 3 non-zero manufacturers, add some zero-value ones
396
- if len(non_zero_manufacturers) < 3:
397
- zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
398
- manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
399
- else:
400
- manufacturers_to_show = non_zero_manufacturers
401
 
402
- values = manufacturers_to_show['units'].tolist()
403
- amounts = manufacturers_to_show['sales'].tolist()
404
- manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
405
 
406
- # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
407
- # for manufacturer, value, amount in zip(manufacturers, values, amounts):
408
- # (f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
409
 
410
- if manufacturers: # Only create the chart if we have data
411
- fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
412
- st.pyplot(fig)
 
 
 
 
 
 
 
 
 
 
 
 
413
  else:
414
- st.warning("No data available to create the radar chart.")
415
 
416
- else:
417
- st.warning("No combined top manufacturers found.")
 
418
 
419
- # Ensure codigo_cliente in ventas_clientes is a string
420
- ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
 
421
 
422
- # Ensure customer_code is a string and strip any spaces
423
- customer_code = str(customer_code).strip()
 
 
 
 
 
 
424
 
425
- # if customer_code in ventas_clientes['codigo_cliente'].unique():
426
- # (f"Customer {customer_code} found in ventas_clientes")
427
- # else:
428
- # (f"Customer {customer_code} not found in ventas_clientes")
429
 
430
- # Customer sales 2021-2024 (if data exists)
431
  sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
432
  if all(col in ventas_clientes.columns for col in sales_columns):
433
  customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
@@ -442,13 +424,13 @@ elif page == "Customer Analysis":
442
  actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
443
  predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()
444
 
445
- # Estimate full-year predicted sales (assuming predictions available until September)
446
  months_available = 9 # Data available until September
447
  actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
448
 
449
  # Add 2024 actual and predicted sales
450
- sales_values = list(customer_sales) + [actual_sales_2024_annual] # Actual sales
451
- predicted_values = list(customer_sales) + [predicted_sales_2024] # Predicted sales
452
 
453
  # Add 2024 to the years list
454
  years.append('2024')
@@ -502,6 +484,268 @@ elif page == "Customer Analysis":
502
  st.warning("Sales data for 2021-2023 not available in the dataset.")
503
 
504
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
505
  # Customer Recommendations Page
506
  elif page == "Articles Recommendations":
507
  st.title("Articles Recommendations")
 
241
  )}
242
  )
243
  # Customer Analysis Page
244
+
245
  elif page == "Customer Analysis":
246
  st.markdown("""
247
  <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
 
266
  customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
267
  time.sleep(1)
268
 
 
269
  if not customer_match.empty:
270
  cluster = customer_match['cluster_id'].values[0]
271
 
 
313
  actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
314
 
315
  if not actual_sales.empty:
316
+ # Merge predictions with actual sales
317
  results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
318
  on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
319
  how='left')
320
  results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
321
+ else:
322
+ # If no actual sales data for 2024, fill 'ventas_reales' with 0
323
+ results['ventas_reales'] = 0
324
+
325
+ # Ensure any missing sales data is filled with 0
326
+ results['ventas_reales'].fillna(0, inplace=True)
327
+
328
+ # Split space into two columns
329
+ col1, col2 = st.columns(2)
330
+
331
+ # Column 1: Radar chart for top manufacturers
332
+ with col1:
333
+ # Radar chart logic remains the same
334
+ customer_df = df[df["CLIENTE"] == str(customer_code)]
335
+ all_manufacturers = customer_df.iloc[:, 1:].T
336
+ all_manufacturers.index = all_manufacturers.index.astype(str)
337
+
338
+ customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
339
+ sales_data = customer_euros.iloc[:, 1:].T
340
+ sales_data.index = sales_data.index.astype(str)
341
+
342
+ sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
343
+ sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
344
+ all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
345
+
346
+ top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
347
+ top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
348
+ combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
349
+
350
+ combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
351
+
352
+ if combined_top:
353
+ combined_data = pd.DataFrame({
354
+ 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
355
+ 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
356
+ }).fillna(0)
357
+
358
+ combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
359
+ non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
360
+
361
+ if len(non_zero_manufacturers) < 3:
362
+ zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
363
+ manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
364
+ else:
365
+ manufacturers_to_show = non_zero_manufacturers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ values = manufacturers_to_show['units'].tolist()
368
+ amounts = manufacturers_to_show['sales'].tolist()
369
+ manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
370
 
371
+ if manufacturers:
372
+ fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
373
+ st.pyplot(fig)
374
 
375
+ # Column 2: Alerts and additional analysis
376
+ with col2:
377
+ st.markdown(f"### Alerts for {customer_code}")
378
+
379
+ # Identify manufacturers that didn't meet predicted sales
380
+ underperforming_manufacturers = results[results['ventas_reales'] < results['ventas_predichas']]
381
+
382
+ if not underperforming_manufacturers.empty:
383
+ st.warning("Some manufacturers have not met predicted sales:")
384
+ for index, row in underperforming_manufacturers.iterrows():
385
+ manufacturer_name = get_supplier_name(row['marca_id_encoded'])
386
+ predicted = row['ventas_predichas']
387
+ actual = row['ventas_reales']
388
+ delta = predicted - actual
389
+ st.write(f"- {manufacturer_name}: Predicted = {predicted:.2f}€, Actual = {actual:.2f}€, Missed = {delta:.2f}€")
390
  else:
391
+ st.success("All manufacturers have met or exceeded predicted sales.")
392
 
393
+ # Gráfico adicional: Comparar las ventas predichas y reales para los principales fabricantes
394
+ st.markdown("### Predicted vs Actual Sales for Top Manufacturers")
395
+ top_manufacturers = results.groupby('marca_id_encoded').agg({'ventas_reales': 'sum', 'ventas_predichas': 'sum'}).sort_values(by='ventas_reales', ascending=False).head(10)
396
 
397
+ fig_comparison = go.Figure()
398
+ fig_comparison.add_trace(go.Bar(x=top_manufacturers.index, y=top_manufacturers['ventas_reales'], name="Actual Sales", marker_color='blue'))
399
+ fig_comparison.add_trace(go.Bar(x=top_manufacturers.index, y=top_manufacturers['ventas_predichas'], name="Predicted Sales", marker_color='orange'))
400
 
401
+ fig_comparison.update_layout(
402
+ title="Actual vs Predicted Sales by Top Manufacturers",
403
+ xaxis_title="Manufacturer",
404
+ yaxis_title="Sales (€)",
405
+ barmode='group',
406
+ height=400,
407
+ hovermode="x unified"
408
+ )
409
 
410
+ st.plotly_chart(fig_comparison, use_container_width=True)
 
 
 
411
 
412
+ # Ensure customer sales (2021-2024)
413
  sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
414
  if all(col in ventas_clientes.columns for col in sales_columns):
415
  customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
 
424
  actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
425
  predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()
426
 
427
+ # Estimate full-year predicted sales
428
  months_available = 9 # Data available until September
429
  actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
430
 
431
  # Add 2024 actual and predicted sales
432
+ sales_values = list(customer_sales) + [actual_sales_2024_annual]
433
+ predicted_values = list(customer_sales) + [predicted_sales_2024]
434
 
435
  # Add 2024 to the years list
436
  years.append('2024')
 
484
  st.warning("Sales data for 2021-2023 not available in the dataset.")
485
 
486
 
487
+
488
+ # elif page == "Customer Analysis":
489
+ # st.markdown("""
490
+ # <h2 style='text-align: center; font-size: 2.5rem;'>Customer Analysis</h2>
491
+ # <p style='text-align: center; font-size: 1.2rem; color: gray;'>
492
+ # Enter the customer code to explore detailed customer insights,
493
+ # including past sales, predictions for the current year, and manufacturer-specific information.
494
+ # </p>
495
+ # """, unsafe_allow_html=True)
496
+
497
+ # # Combine text input and dropdown into a single searchable selectbox
498
+ # customer_code = st.selectbox(
499
+ # "Search and Select Customer Code",
500
+ # df['CLIENTE'].unique(), # All customer codes
501
+ # format_func=lambda x: str(x), # Ensures the values are displayed as strings
502
+ # help="Start typing to search for a specific customer code"
503
+ # )
504
+
505
+ # if st.button("Calcular"):
506
+ # if customer_code:
507
+ # with st.spinner("We are identifying the customer's cluster..."):
508
+ # # Find Customer's Cluster
509
+ # customer_match = customer_clusters[customer_clusters['cliente_id'] == customer_code]
510
+ # time.sleep(1)
511
+
512
+
513
+ # if not customer_match.empty:
514
+ # cluster = customer_match['cluster_id'].values[0]
515
+
516
+ # with st.spinner(f"Selecting predictive model..."):
517
+ # # Load the Corresponding Model
518
+ # model_path = f'models/modelo_cluster_{cluster}.txt'
519
+ # gbm = lgb.Booster(model_file=model_path)
520
+
521
+ # with st.spinner("Getting the data ready..."):
522
+ # # Load predict data for that cluster
523
+ # predict_data = pd.read_csv(f'predicts/predict_cluster_{cluster}.csv')
524
+
525
+ # # Convert cliente_id to string
526
+ # predict_data['cliente_id'] = predict_data['cliente_id'].astype(str)
527
+
528
+ # with st.spinner("Filtering data..."):
529
+
530
+ # # Filter for the specific customer
531
+ # customer_code_str = str(customer_code)
532
+ # customer_data = predict_data[predict_data['cliente_id'] == customer_code_str]
533
+
534
+ # with st.spinner("Generating sales predictions..."):
535
+
536
+ # if not customer_data.empty:
537
+ # # Define features consistently with the training process
538
+ # lag_features = [f'precio_total_lag_{lag}' for lag in range(1, 25)]
539
+ # features = lag_features + ['mes', 'marca_id_encoded', 'año', 'cluster_id']
540
+
541
+ # # Prepare data for prediction
542
+ # X_predict = customer_data[features]
543
+
544
+ # # Convert categorical features to 'category' dtype
545
+ # categorical_features = ['mes', 'marca_id_encoded', 'cluster_id']
546
+ # for feature in categorical_features:
547
+ # X_predict[feature] = X_predict[feature].astype('category')
548
+
549
+ # # Make Prediction for the selected customer
550
+ # y_pred = gbm.predict(X_predict, num_iteration=gbm.best_iteration)
551
+
552
+ # # Reassemble the results
553
+ # results = customer_data[['cliente_id', 'marca_id_encoded', 'fecha_mes']].copy()
554
+ # results['ventas_predichas'] = y_pred
555
+
556
+ # # Load actual data
557
+ # actual_sales = df_agg_2024[df_agg_2024['cliente_id'] == customer_code_str]
558
+
559
+ # if not actual_sales.empty:
560
+ # results = results.merge(actual_sales[['cliente_id', 'marca_id_encoded', 'fecha_mes', 'precio_total']],
561
+ # on=['cliente_id', 'marca_id_encoded', 'fecha_mes'],
562
+ # how='left')
563
+ # results.rename(columns={'precio_total': 'ventas_reales'}, inplace=True)
564
+ # results['ventas_reales'].fillna(0, inplace=True)
565
+ # # st.write("### Final Results DataFrame:")
566
+ # # st.write(results.head())
567
+ # # st.write(f"Shape: {results.shape}")
568
+
569
+ # # Calculate metrics only for non-null actual sales
570
+ # valid_results = results.dropna(subset=['ventas_reales'])
571
+ # non_zero_actuals = valid_results[valid_results['ventas_reales'] != 0]
572
+ # if not valid_results.empty:
573
+ # mae = mean_absolute_error(valid_results['ventas_reales'], valid_results['ventas_predichas'])
574
+ # mape = np.mean(np.abs((non_zero_actuals['ventas_reales'] - non_zero_actuals['ventas_predichas']) / non_zero_actuals['ventas_reales'])) * 100
575
+ # rmse = np.sqrt(mean_squared_error(valid_results['ventas_reales'], valid_results['ventas_predichas']))
576
+
577
+ # # st.write(f"Actual total sales for Customer {customer_code}: {valid_results['ventas_reales'].sum():.2f}")
578
+ # # st.write(f"MAE: {mae:.2f}€")
579
+ # # st.write(f"MAPE: {mape:.2f}%")
580
+ # # st.write(f"RMSE: {rmse:.2f}")
581
+
582
+ # # # Analysis of results
583
+ # # threshold_good = 100 # You may want to adjust this threshold
584
+ # # if mae < threshold_good:
585
+ # # st.success(f"Customer {customer_code} is performing well based on the predictions.")
586
+ # # else:
587
+ # # st.warning(f"Customer {customer_code} is not performing well based on the predictions.")
588
+ # # else:
589
+ # # st.warning(f"No actual sales data found for customer {customer_code} in df_agg_2024.")
590
+
591
+ # # st.write("### Debug Information for Radar Chart:")
592
+ # # st.write(f"Shape of customer_data: {customer_data.shape}")
593
+ # # st.write(f"Shape of euros_proveedor: {euros_proveedor.shape}")
594
+
595
+ # # Get percentage of units sold for each manufacturer
596
+ # customer_df = df[df["CLIENTE"] == str(customer_code)] # Get the customer data
597
+ # all_manufacturers = customer_df.iloc[:, 1:].T # Exclude CLIENTE column (manufacturers are in columns)
598
+ # all_manufacturers.index = all_manufacturers.index.astype(str)
599
+
600
+ # # Get total sales for each manufacturer from euros_proveedor
601
+ # customer_euros = euros_proveedor[euros_proveedor["CLIENTE"] == str(customer_code)]
602
+ # sales_data = customer_euros.iloc[:, 1:].T # Exclude CLIENTE column
603
+ # sales_data.index = sales_data.index.astype(str)
604
+
605
+ # # Remove the 'CLIENTE' row from sales_data to avoid issues with mixed types
606
+ # sales_data_filtered = sales_data.drop(index='CLIENTE', errors='ignore')
607
+
608
+ # # Ensure all values are numeric
609
+ # sales_data_filtered = sales_data_filtered.apply(pd.to_numeric, errors='coerce')
610
+ # all_manufacturers = all_manufacturers.apply(pd.to_numeric, errors='coerce')
611
+
612
+ # # Sort manufacturers by percentage of units and get top 10
613
+ # top_units = all_manufacturers.sort_values(by=all_manufacturers.columns[0], ascending=False).head(10)
614
+
615
+ # # Sort manufacturers by total sales and get top 10
616
+ # top_sales = sales_data_filtered.sort_values(by=sales_data_filtered.columns[0], ascending=False).head(10)
617
+
618
+ # # Combine top manufacturers from both lists and get up to 20 unique manufacturers
619
+ # combined_top = pd.concat([top_units, top_sales]).index.unique()[:20]
620
+
621
+ # # Filter out manufacturers that are not present in both datasets
622
+ # combined_top = [m for m in combined_top if m in all_manufacturers.index and m in sales_data_filtered.index]
623
+
624
+ # # st.write(f"Number of combined top manufacturers: {len(combined_top)}")
625
+
626
+ # if combined_top:
627
+ # # Create a DataFrame with combined data for these top manufacturers
628
+ # combined_data = pd.DataFrame({
629
+ # 'units': all_manufacturers.loc[combined_top, all_manufacturers.columns[0]],
630
+ # 'sales': sales_data_filtered.loc[combined_top, sales_data_filtered.columns[0]]
631
+ # }).fillna(0)
632
+
633
+ # # Sort by units, then by sales
634
+ # combined_data_sorted = combined_data.sort_values(by=['units', 'sales'], ascending=False)
635
+
636
+ # # Filter out manufacturers with 0 units
637
+ # non_zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] > 0]
638
+
639
+ # # If we have less than 3 non-zero manufacturers, add some zero-value ones
640
+ # if len(non_zero_manufacturers) < 3:
641
+ # zero_manufacturers = combined_data_sorted[combined_data_sorted['units'] == 0].head(3 - len(non_zero_manufacturers))
642
+ # manufacturers_to_show = pd.concat([non_zero_manufacturers, zero_manufacturers])
643
+ # else:
644
+ # manufacturers_to_show = non_zero_manufacturers
645
+
646
+ # values = manufacturers_to_show['units'].tolist()
647
+ # amounts = manufacturers_to_show['sales'].tolist()
648
+ # manufacturers = [get_supplier_name(m) for m in manufacturers_to_show.index]
649
+
650
+ # # st.write(f"### Results for top {len(manufacturers)} manufacturers:")
651
+ # # for manufacturer, value, amount in zip(manufacturers, values, amounts):
652
+ # # (f"{manufacturer} = {value:.2f}% of units, €{amount:.2f} total sales")
653
+
654
+ # if manufacturers: # Only create the chart if we have data
655
+ # fig = radar_chart(manufacturers, values, amounts, f'Radar Chart for Top {len(manufacturers)} Manufacturers of Customer {customer_code}')
656
+ # st.pyplot(fig)
657
+ # else:
658
+ # st.warning("No data available to create the radar chart.")
659
+
660
+ # else:
661
+ # st.warning("No combined top manufacturers found.")
662
+
663
+ # # Ensure codigo_cliente in ventas_clientes is a string
664
+ # ventas_clientes['codigo_cliente'] = ventas_clientes['codigo_cliente'].astype(str).str.strip()
665
+
666
+ # # Ensure customer_code is a string and strip any spaces
667
+ # customer_code = str(customer_code).strip()
668
+
669
+ # # if customer_code in ventas_clientes['codigo_cliente'].unique():
670
+ # # (f"Customer {customer_code} found in ventas_clientes")
671
+ # # else:
672
+ # # (f"Customer {customer_code} not found in ventas_clientes")
673
+
674
+ # # Customer sales 2021-2024 (if data exists)
675
+ # sales_columns = ['VENTA_2021', 'VENTA_2022', 'VENTA_2023']
676
+ # if all(col in ventas_clientes.columns for col in sales_columns):
677
+ # customer_sales_data = ventas_clientes[ventas_clientes['codigo_cliente'] == customer_code]
678
+
679
+ # if not customer_sales_data.empty:
680
+ # customer_sales = customer_sales_data[sales_columns].values[0]
681
+ # years = ['2021', '2022', '2023']
682
+
683
+ # # Add the 2024 actual and predicted data
684
+ # if 'ventas_predichas' in results.columns and 'ventas_reales' in results.columns:
685
+ # # Get the actual and predicted sales for 2024
686
+ # actual_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_reales'].sum()
687
+ # predicted_sales_2024 = results[results['fecha_mes'].str.startswith('2024')]['ventas_predichas'].sum()
688
+
689
+ # # Estimate full-year predicted sales (assuming predictions available until September)
690
+ # months_available = 9 # Data available until September
691
+ # actual_sales_2024_annual = (actual_sales_2024 / months_available) * 12
692
+
693
+ # # Add 2024 actual and predicted sales
694
+ # sales_values = list(customer_sales) + [actual_sales_2024_annual] # Actual sales
695
+ # predicted_values = list(customer_sales) + [predicted_sales_2024] # Predicted sales
696
+
697
+ # # Add 2024 to the years list
698
+ # years.append('2024')
699
+
700
+ # fig_sales_bar = go.Figure()
701
+ # # Add trace for historical sales (2021-2023)
702
+ # fig_sales_bar.add_trace(go.Bar(
703
+ # x=years[:3], # 2021, 2022, 2023
704
+ # y=sales_values[:3],
705
+ # name="Historical Sales",
706
+ # marker_color='blue'
707
+ # ))
708
+
709
+ # # Add trace for 2024 actual sales
710
+ # fig_sales_bar.add_trace(go.Bar(
711
+ # x=[years[3]], # 2024
712
+ # y=[sales_values[3]],
713
+ # name="2024 Actual Sales (Annualized)",
714
+ # marker_color='green'
715
+ # ))
716
+
717
+ # # Add trace for 2024 predicted sales
718
+ # fig_sales_bar.add_trace(go.Bar(
719
+ # x=[years[3]], # 2024
720
+ # y=[predicted_values[3]],
721
+ # name="2024 Predicted Sales",
722
+ # marker_color='orange'
723
+ # ))
724
+
725
+ # # Update layout
726
+ # fig_sales_bar.update_layout(
727
+ # title=f"Sales Over the Years for Customer {customer_code}",
728
+ # xaxis_title="Year",
729
+ # yaxis_title="Sales (€)",
730
+ # barmode='group',
731
+ # height=600,
732
+ # legend_title_text="Sales Type",
733
+ # hovermode="x unified"
734
+ # )
735
+
736
+ # # Show the interactive bar chart in Streamlit
737
+ # st.plotly_chart(fig_sales_bar, use_container_width=True)
738
+
739
+ # else:
740
+ # st.warning(f"No predicted or actual data found for customer {customer_code} for 2024.")
741
+
742
+ # else:
743
+ # st.warning(f"No historical sales data found for customer {customer_code}")
744
+
745
+ # else:
746
+ # st.warning("Sales data for 2021-2023 not available in the dataset.")
747
+
748
+
749
  # Customer Recommendations Page
750
  elif page == "Articles Recommendations":
751
  st.title("Articles Recommendations")