IvanStudent commited on
Commit
10caf6a
1 Parent(s): 4e5cacd

Guardar mis cambios locales

Browse files
Files changed (2) hide show
  1. app.py +92 -76
  2. requirements.txt +0 -0
app.py CHANGED
@@ -1,88 +1,104 @@
1
- import pandas as pd
2
- import matplotlib.pyplot as plt
3
- import joblib
4
  import gradio as gr
5
- from dateutil.relativedelta import relativedelta
6
- import calendar
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
- def load_model():
9
- try:
10
- model = joblib.load('arima_sales_model.pkl')
11
- return model, None
12
- except Exception as e:
13
- return None, f"Failed to load model: {str(e)}"
14
 
15
- def parse_date(date_str):
16
- """Parse the custom date format 'Month-Year'."""
17
- try:
18
- date = pd.to_datetime(date_str, format="%B-%Y")
19
- _, last_day = calendar.monthrange(date.year, date.month)
20
- start_date = date.replace(day=1)
21
- end_date = date.replace(day=last_day)
22
- return start_date, end_date, None
23
- except ValueError:
24
- return None, None, "Date format should be 'Month-Year', e.g., 'January-2024'."
25
 
26
- def forecast_sales(uploaded_file, start_date_str, end_date_str):
27
- if uploaded_file is None:
28
- return "No file uploaded.", None, "Please upload a file."
 
29
 
30
- try:
31
- df = pd.read_csv(uploaded_file)
32
- if 'Date' not in df.columns or 'Sale' not in df.columns:
33
- return None, "The uploaded file must contain 'Date' and 'Sale' columns.", "File does not have required columns."
34
- except Exception as e:
35
- return None, f"Failed to read the uploaded CSV file: {str(e)}", "Error reading file."
36
 
37
- start_date, _, error = parse_date(start_date_str)
38
- _, end_date, error_end = parse_date(end_date_str)
39
- if error or error_end:
40
- return None, error or error_end, "Invalid date format."
 
 
 
41
 
42
- df['Date'] = pd.to_datetime(df['Date'])
43
- df = df.rename(columns={'Date': 'ds', 'Sale': 'y'})
 
 
44
 
45
- df_filtered = df[(df['ds'] >= start_date) & (df['ds'] <= end_date)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
- arima_model, error = load_model()
48
- if arima_model is None:
49
- return None, error, "Failed to load ARIMA model."
 
50
 
51
- try:
52
- forecast = arima_model.get_forecast(steps=60)
53
- forecast_index = pd.date_range(start=end_date, periods=61, freq='D')[1:]
54
- forecast_df = pd.DataFrame({'Date': forecast_index, 'Sales Forecast': forecast.predicted_mean})
55
-
56
- fig, ax = plt.subplots(figsize=(10, 6))
57
- ax.plot(df_filtered['ds'], df_filtered['y'], label='Actual Sales', color='blue')
58
- ax.plot(forecast_df['Date'], forecast_df['Sales Forecast'], label='Sales Forecast', color='red', linestyle='--')
59
- ax.set_xlabel('Date')
60
- ax.set_ylabel('Sales')
61
- ax.set_title('Sales Forecasting with ARIMA')
62
- ax.legend()
63
- return fig, "File loaded and processed successfully."
64
- except Exception as e:
65
- return None, f"Failed to generate plot: {str(e)}", "Plotting failed."
66
 
67
- def setup_interface():
68
- with gr.Blocks() as demo:
69
- gr.Markdown("## MLCast v1.1 - Intelligent Sales Forecasting System")
70
- with gr.Row():
71
- with gr.Column(scale=1):
72
- file_input = gr.File(label="Upload your store data")
73
- start_date_input = gr.Textbox(label="Start Date", placeholder="January-2024")
74
- end_date_input = gr.Textbox(label="End Date", placeholder="December-2024")
75
- forecast_button = gr.Button("Forecast Sales")
76
- with gr.Column(scale=2):
77
- output_plot = gr.Plot()
78
- output_message = gr.Textbox(label="Notifications", visible=True, lines=2)
79
- forecast_button.click(
80
- forecast_sales,
81
- inputs=[file_input, start_date_input, end_date_input],
82
- outputs=[output_plot, output_message]
83
- )
84
- return demo
85
 
86
- if __name__ == "__main__":
87
- interface = setup_interface()
88
- interface.launch()
 
 
 
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ from datetime import datetime
5
+ import pmdarima as pm
6
+ from pmdarima import auto_arima
7
+ import plotly.graph_objects as go
8
+ from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
9
+
10
+ # Load the TAPAS Model
11
+ def load_tapas_model():
12
+ model_name = "google/tapas-large-finetuned-wtq"
13
+ tokenizer = TapasTokenizer.from_pretrained(model_name)
14
+ model = TapasForQuestionAnswering.from_pretrained(model_name)
15
+ pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
16
+ return pipe
17
+
18
+ pipe = load_tapas_model()
19
 
20
+ # Helper Functions
21
+ def drop(dataframe):
22
+ # Drop unnecessary columns and keep only 'Date' and 'Sales'
23
+ columns_to_keep = [col for col in dataframe.columns if "date" in col.lower() or "sales" in col.lower()]
24
+ dataframe = dataframe[columns_to_keep].dropna()
25
+ return dataframe
26
 
27
+ def date_format(dataframe):
28
+ # Convert the 'Date' column to a proper datetime format
29
+ dataframe['Date'] = pd.to_datetime(dataframe['Date'].str.strip(), format="%m/%d/%Y")
30
+ return dataframe
 
 
 
 
 
 
31
 
32
+ def group_to_three(dataframe):
33
+ # Group the data into three-day intervals and calculate the mean sales
34
+ dataframe = dataframe.groupby(pd.Grouper(key='Date', freq='3D')).Sales.mean().dropna().round(2)
35
+ return dataframe
36
 
37
+ def series_to_df_exogenous(series):
38
+ # Convert the series to a DataFrame and create exogenous variables
39
+ dataframe = series.to_frame().reset_index().set_index('Date')
40
+ dataframe['Sales First Difference'] = dataframe['Sales'].diff().dropna()
41
+ dataframe['Seasonal First Difference'] = dataframe['Sales'].diff(12).dropna()
42
+ return dataframe.dropna()
43
 
44
+ def train_test(dataframe, n):
45
+ # Split the data into training and testing sets
46
+ training_y = dataframe['Sales'][:-n]
47
+ test_y = dataframe['Sales'][-n:]
48
+ training_X = dataframe.iloc[:-n, 1:]
49
+ test_X = dataframe.iloc[-n:, 1:]
50
+ return training_y, test_y, training_X, test_X
51
 
52
+ def test_fitting(train_X, train_y):
53
+ # Fit a SARIMAX model using auto_arima
54
+ model = auto_arima(y=train_y, X=train_X, seasonal=True, m=12, stepwise=True, suppress_warnings=True)
55
+ return model
56
 
57
+ def forecast_sales(df, period):
58
+ # Prepare data and make predictions
59
+ df = drop(df)
60
+ df = date_format(df)
61
+ series = group_to_three(df)
62
+ exog_df = series_to_df_exogenous(series)
63
+
64
+ n_periods = int(len(exog_df) * 0.2)
65
+ train_y, test_y, train_X, test_X = train_test(exog_df, n_periods)
66
+
67
+ model = test_fitting(train_X, train_y)
68
+ future_fitted, _ = model.predict(n_periods=int(period / 3), X=exog_df.iloc[-int(period / 3):, 1:], return_conf_int=True)
69
+
70
+ future_dates = pd.date_range(start=series.index[-1], periods=int(period / 3), freq='3D')
71
+ forecast_df = pd.DataFrame({'Date': future_dates, 'Forecasted Sales': future_fitted})
72
+
73
+ return forecast_df
74
 
75
+ def answer_question(forecast_df, question):
76
+ # Use TAPAS model to answer questions
77
+ answer = pipe(table=forecast_df, query=question)
78
+ return answer['answer']
79
 
80
+ # Gradio Interface
81
+ def main(uploaded_file, period, question):
82
+ # Main function to process uploaded file, forecast sales, and answer the question
83
+ df = pd.read_csv(uploaded_file)
84
+ forecast_df = forecast_sales(df, period)
85
+ answer = answer_question(forecast_df, question)
86
+
87
+ return forecast_df, answer
 
 
 
 
 
 
 
88
 
89
+ interface = gr.Interface(
90
+ fn=main,
91
+ inputs=[
92
+ gr.File(label="Upload CSV File"),
93
+ gr.Slider(minimum=30, maximum=90, step=1, label="Forecast Days"),
94
+ gr.Textbox(placeholder="Ask a question about the forecasted data", label="Question")
95
+ ],
96
+ outputs=[
97
+ gr.Dataframe(label="Forecasted Sales Data"),
98
+ gr.Textbox(label="Answer")
99
+ ],
100
+ title="Sales Forecasting Dashboard",
101
+ description="Upload your sales data and get a forecast. You can also ask questions about the forecasted data."
102
+ )
 
 
 
 
103
 
104
+ interface.launch()
 
 
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ