Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Sep 27 17:45:59 2023 | |
env: dardos2 | |
@author: forti | |
-Grafica training y validation sets para comparar eficiencia de pronostico | |
- Grafica con plotly y matplotlib. | |
-Genera df_reindexed con periodo semanal | |
-Calcula sigma y safety stock | |
- Calcula MOVING AVERAGE ( descomentar) | |
""" | |
import numpy as np | |
import pandas as pd | |
# import matplotlib.pyplot as plt | |
import plotly.io as pio | |
import plotly.express as px | |
from darts import TimeSeries | |
from darts.models import RandomForest | |
# from darts.models import NaiveDrift | |
# from darts.models import MovingAverageFilter | |
import streamlit as st | |
# import locale | |
from darts.metrics import mae, rmse | |
from PIL import Image | |
########################################################################### | |
def convert_df(df_new): | |
# IMPORTANT: Cache the conversion to prevent computation on every rerun | |
return df_new.to_csv().encode('utf-8') | |
########################################################################### | |
pio.renderers.default = "browser" | |
st.title(" 🏪 Super Playitas") | |
st.header("🤖 Pronósticos con Inteligencia Artificial 🤖") | |
df = None | |
uploaded_file = st.file_uploader("Seleccione su producto", type='xlsx') | |
if uploaded_file is not None: | |
df = pd.read_excel(uploaded_file) | |
# st.write(data) | |
if df is not None: | |
################ Limpieza de datos ########################### | |
# df = pd.read_excel('marlboro-20.xlsx') | |
# df = pd.read_excel('cocacola-600.xlsx') | |
df.drop([0, 1, 2, 3, 4, 5], inplace=True) | |
df.columns = df.iloc[0] | |
df.drop(6, inplace=True) | |
########################################################### | |
# Set to Spanish or English locale | |
#locale.setlocale(locale.LC_TIME, "en_US.UTF-8") | |
# locale.setlocale(locale.LC_TIME, "es_US.UTF-8") | |
# Create a dictionary mapping Spanish month abbreviations to English | |
month_dict = { | |
'Ene': 'Jan', | |
'Feb': 'Feb', | |
'Mar': 'Mar', | |
'Abr': 'Apr', | |
'May': 'May', | |
'Jun': 'Jun', | |
'Jul': 'Jul', | |
'Ago': 'Aug', | |
'Sep': 'Sep', | |
'Oct': 'Oct', | |
'Nov': 'Nov', | |
'Dic': 'Dec' | |
} | |
# Assume df is your DataFrame and 'date' is the column with dates | |
df['Fecha'] = df['Fecha'].replace(month_dict, regex=True) | |
# convert the date column into datetime | |
df['Fecha'] = pd.to_datetime(df['Fecha'], format='%d/%b/%Y %I:%M %p', errors='coerce') | |
# rename columns | |
df.columns.values[2] = 'Cantidad-1' | |
df.columns.values[5] = 'Cantidad-2' | |
df.columns.values[8] = 'Cantidad-3' | |
df['Cantidad-2'] = pd.to_numeric(df['Cantidad-2'], errors='coerce') | |
# lee el ultimo valor de la columna de cantidad-3 | |
df['Cantidad-3'] = pd.to_numeric(df['Cantidad-3'], errors='coerce') | |
inventario_neto = df['Cantidad-3'].iloc[-1] | |
# drop NaN rows from column "Cantidad-2" | |
df.dropna(subset=['Cantidad-2'], inplace=True) | |
# drop negative values from "Cantidad-2" | |
df = df[df['Cantidad-2'] >= 0] | |
# Create a new column 'Fecha_sin_hora' with just the date | |
df['Fecha_sin_hora'] = df['Fecha'].dt.date | |
# Group by 'Fecha_sin_hora' and sum 'Cantidad-2' | |
df_sum = df.groupby('Fecha_sin_hora')['Cantidad-2'].sum().reset_index() | |
############################################################### | |
# # Create a new column 'Mes' with just the month and year | |
# df['Mes'] = df['Fecha_sin_hora'].dt.to_period('M') | |
# # Group by 'Mes' and sum 'Cantidad-2' | |
# df_sum = df.groupby('Mes')['Cantidad-2'].sum().reset_index() | |
############################################################### | |
# df_sum.drop(737, inplace=True) | |
# df_sum = df_sum.iloc[699:800] | |
################################################################ | |
######## Create a DataFrame with a new date range ############################ | |
# Create the date range | |
# new_date_range = pd.date_range(start='2023-03-12', end='2023-08-11') | |
new_date_range = pd.date_range(start=df_sum.iloc[0,0], end=df_sum.iloc[-1,0]) | |
# Convert the date range to a DataFrame | |
#df_3 = pd.DataFrame({'Fecha': new_date_range}) | |
################################################################## | |
df_sum.set_index('Fecha_sin_hora', inplace=True) | |
# Re-index with a new date-range for each group | |
#df_reindexed = df_sum.apply(lambda x: x.set_index('Fecha_sin_hora').reindex(new_date_range)) | |
df_reindexed = df_sum.apply(lambda x: x.reindex(new_date_range)) | |
# Reset the index | |
#df_reindexed = df_reindexed.reset_index(level=0, drop=True).reset_index() | |
# Fill missing sales values with 0 | |
df_reindexed['Cantidad-2'] = df_reindexed['Cantidad-2'].fillna(0) | |
################################################################# | |
# Create a new column 'Semana' | |
df_reindexed.reset_index(inplace=True) | |
df_reindexed = df_reindexed.rename(columns={'index': 'Fecha'}) | |
df_reindexed['Semana'] = df_reindexed['Fecha'].dt.to_period('W') | |
# Group by 'Semana' and sum 'Cantidad-2' | |
df_reindexed = df_reindexed.groupby('Semana')['Cantidad-2'].sum().reset_index() | |
# rename columns | |
df_reindexed.columns.values[0] = 'Fecha' | |
# ax = df_reindexed.plot(label='Historial') | |
# plt.show() | |
#hacer fecha el indice | |
# df_reindexed.set_index('Fecha', inplace=True) | |
df_reindexed['Fecha'] = df_reindexed['Fecha'].astype(str) | |
df_reindexed['Fecha'] = df_reindexed['Fecha'].str.split('/').str[0] | |
# usar para pronostico de las ultimas dos semanas. fecha automatizada | |
# dt = pd.to_datetime(df_sum.index[1]) | |
# dt = dt + pd.Timedelta(days=1) | |
################################################################# | |
# Convert datetime to string to be able to use DARTS | |
#df_sum['Fecha_sin_hora'] = df_sum['Fecha_sin_hora'].astype(str) | |
########## Reset the index. Use only for daily df_reindexed ################## | |
# df_reindexed['Fecha'] = df_reindexed.index | |
# df_reindexed.reset_index(drop=True, inplace=True) | |
# df_reindexed = df_reindexed[['Fecha', 'Cantidad-2']] | |
# Create a TimeSeries object, specifying the time and value columns | |
series = TimeSeries.from_dataframe(df_reindexed, "Fecha", 'Cantidad-2') | |
# series = TimeSeries.from_dataframe(df_reindexed, freq="W") | |
# Assuming df_reindexed is your DataFrame and it's indexed by date | |
# series = TimeSeries.from_times_and_values(times=df_reindexed, values=df_reindexed['Cantidad-2']) | |
# Set aside the last 16 days as a validation series | |
train, val = series[:-15], series[-15:] | |
#################### Train Forecasting Models ###################### | |
# Train baseline model | |
# model_1 = NaiveDrift() | |
# model_1.fit(series) | |
# Train ML model | |
model_2 = RandomForest(lags=20) #25 #55 | |
model_2a = RandomForest(lags=20) | |
# model_2.fit(train) | |
model_2.fit(series) | |
model_2a.fit(train) | |
# MOVING AVERAGE | |
# model_3 = MovingAverageFilter(window=15) | |
# filtered_series = model_3.filter(series) | |
#################################################################### | |
################## Predict with Forecasting Models ################# | |
# Predict using baseline model | |
# prediction_1 = model_1.predict(15) | |
# Predict using RandomForest | |
# prediction_2 = model_2.predict(len(val)) | |
prediction_2 = model_2.predict(15) | |
prediction_2a = model_2a.predict(15) | |
# df_4 = pd.DataFrame(prediction_2.pd_dataframe()) | |
# df_4 = df_4.reset_index(drop=True) | |
################################################################### | |
######## plot forecast from validation set ######################## | |
# converts datetime objects to pandas dataframes | |
series_df = series.pd_dataframe() | |
prediction_2_df = prediction_2.pd_dataframe() | |
prediction_2a_df = prediction_2a.pd_dataframe() | |
val_df = val.pd_dataframe() | |
# series.plot(label="Historial de Ventas") | |
# prediction_2a.plot(label="Pronóstico", low_quantile=0.05, high_quantile=0.95) | |
# filtered_series.plot(label="Historial de Ventas") | |
# series.plot(label="Historial de Ventas") | |
# plt.xlabel('Fecha') | |
# plt.ylabel('Ventas') | |
# plt.legend() | |
# plt.subplots_adjust(bottom=0.2) | |
# plt.savefig("producto_forecast_2.png", dpi=200) | |
# n=115 | |
# series_df = series_df.tail(n) | |
# ax = series_df.plot(label='Historial') | |
# prediction_2_df.plot(ax=ax, label='Pronostico') | |
# plt.legend() | |
# plt.show() | |
############################################################################### | |
#########################Cacula std del forecast ############################ | |
error = prediction_2a_df["Cantidad-2"] - val_df["Cantidad-2"] | |
mean = error.mean() | |
diff = error - mean | |
diff_squared = diff.apply(lambda x:x**2).sum() | |
std = np.sqrt(diff_squared/(len(diff)-1)) | |
safety_stock = 1.96*std | |
############################################################################### | |
########################## metricas ################################ | |
mae_1 = mae(val, prediction_2a) | |
# mape_1 = mape(val, prediction_2a) | |
rmse_1 = rmse(val, prediction_2a) | |
##################### Pronostico final ################################## | |
promedio = prediction_2_df.mean() | |
pronostico_promedio = promedio + safety_stock | |
# semana = prediction_2_df.index[0] | |
#redondea y los numeros del df. si la borro siguie funcionando todo | |
prediction_2_df = prediction_2_df.round().astype(int) | |
safety_stock = safety_stock.round() | |
##################Grafica con plotly########################################### | |
# df_borrar = df_sum +1 | |
# df_borrar = df_borrar.iloc[800:844] | |
figure_01=px.line(series_df,y="Cantidad-2",) | |
figure_01.layout.update(title_text=None,xaxis_rangeslider_visible=True) | |
figure_02=px.line(prediction_2_df,y="Cantidad-2") | |
figure_02.update_traces(line=dict(color='red')) | |
figure_03=px.scatter(series_df,y="Cantidad-2",) | |
figure_03.update_traces(marker=dict(color='DodgerBlue')) | |
figure_04=px.scatter(prediction_2_df,y="Cantidad-2",) | |
figure_04.update_traces(marker=dict(color='red')) | |
# st.plotly_chart(figure_01) | |
# Add figure_02 data to figure_01 | |
for trace in figure_02.data: | |
figure_01.add_trace(trace) | |
# Add figure_03 data to figure_01 | |
for trace in figure_03.data: | |
figure_01.add_trace(trace) | |
for trace in figure_04.data: | |
figure_01.add_trace(trace) | |
st.subheader(":blue[Historial] y :red[Pronóstico] de Ventas Semanal") | |
st.plotly_chart(figure_01) | |
############################################################################### | |
if mae_1 < 7: | |
st.subheader("Pronóstico de las proximas 15 semanas:") | |
prediction_2_total = prediction_2_df + safety_stock | |
prediction_2_total.index = pd.to_datetime(prediction_2_total.index).date | |
prediction_2_total.rename(columns={'Cantidad-2': 'Nivel de Inventario'}, inplace=True) | |
prediction_2_total['Pronostico'] = prediction_2_df['Cantidad-2'] | |
# Create an array with the number repeated the desired number of times | |
stock = np.full((len(prediction_2_total), 1), safety_stock) | |
# Convert the array to a DataFrame | |
stock_df = pd.DataFrame(stock, columns=['safety']).astype(int) | |
prediction_2_total['Stock de Seguridad'] = stock_df['safety'].values | |
prediction_2_total = prediction_2_total[['Pronostico', 'Stock de Seguridad', 'Nivel de Inventario']] | |
prediction_2_total['Nivel de Inventario'] = prediction_2_total['Nivel de Inventario'].round().astype(int) | |
st.table(prediction_2_total) | |
else: | |
st.subheader("Pronóstico de las proximas 15 semanas:") | |
pronostico_promedio = pronostico_promedio.iloc[0] | |
# Create an array with the number repeated the desired number of times | |
pronostico_promedio = np.full((len(prediction_2_df), 1), pronostico_promedio) | |
# Convert the array to a DataFrame | |
pronostico_promedio_df = pd.DataFrame(pronostico_promedio, columns=['Nivel de Inventario']) | |
prediction_2_df["Nivel de Inventario"] = pronostico_promedio_df['Nivel de Inventario'].values | |
# Create an array with the number repeated the desired number of times | |
stock = np.full((len(prediction_2_df), 1), safety_stock) | |
# Convert the array to a DataFrame | |
stock_df = pd.DataFrame(stock, columns=['safety']).astype(int) | |
prediction_2_df['Stock de Seguridad'] = stock_df['safety'].values | |
prediction_2_total = prediction_2_df | |
prediction_2_total.index = pd.to_datetime(prediction_2_total.index).date | |
prediction_2_total.rename(columns={'Cantidad-2': 'Pronostico'}, inplace=True) | |
prediction_2_total = prediction_2_total[['Pronostico', 'Stock de Seguridad', 'Nivel de Inventario']] | |
prediction_2_total['Nivel de Inventario'] = prediction_2_total['Nivel de Inventario'].round().astype(int) | |
st.table(prediction_2_total) | |
################################################################################# | |
csv = convert_df(prediction_2_total) | |
st.download_button( | |
label="Descargar datos ⤵", | |
data=csv, | |
file_name='pronostico-producto.csv', | |
mime='text/csv', | |
) | |
cantidad = round(prediction_2_total.iloc[0,0] - inventario_neto) | |
if cantidad<0: | |
cantidad =0 | |
# st.text("Cantidad =" + str(cantidad)) | |
st.subheader("Cantidad de producto a ordenar (por semana): ") | |
# st.text("Cantidad = Pronostico - Inventario Neto") | |
st.metric(label=":blue[Cantidad = Nivel de Inventario - Inventario Neto]", value=cantidad) | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
image = Image.open("magna.png") | |
st.image(image, caption='Inteligencia Artificial para tu negocio') | |
st.markdown("Contáctanos: www.magna-machina.com") | |
else: | |
st.subheader("No ha seleccionado su producto.") | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
st.write('\n') | |
image = Image.open("magna.png") | |
st.image(image, caption='Inteligencia Artificial para tu negocio') | |
st.markdown("Contáctanos: www.magna-machina.com") | |