Spaces:
Running
Running
Circhastic
commited on
Commit
•
e2cb6ae
1
Parent(s):
39440ed
Fix app and removed modules
Browse files- app.py +215 -10
- modules/__init__.py +0 -1
- modules/__pycache__/__init__.cpython-311.pyc +0 -0
- modules/__pycache__/arima.cpython-311.pyc +0 -0
- modules/__pycache__/preprocessor.cpython-311.pyc +0 -0
- modules/__pycache__/tapas.cpython-311.pyc +0 -0
- modules/arima.py +0 -68
- modules/preprocessor.py +0 -76
- modules/tapas.py +0 -43
app.py
CHANGED
@@ -1,6 +1,195 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
from
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
st.title("Sales Forecasting Dashboard")
|
6 |
st.write("📈 Welcome User, start using the application by uploading your file in the sidebbar!")
|
@@ -10,10 +199,10 @@ st.set_page_config(
|
|
10 |
page_icon="📈",
|
11 |
layout="wide",
|
12 |
initial_sidebar_state="expanded",
|
13 |
-
|
14 |
|
15 |
-
|
16 |
-
|
17 |
|
18 |
# Sidebar Menu
|
19 |
with st.sidebar:
|
@@ -28,11 +217,27 @@ with st.sidebar:
|
|
28 |
df = pd.read_csv(uploaded_file, parse_dates=True)
|
29 |
st.write("Your uploaded data:")
|
30 |
st.write(df)
|
|
|
31 |
# Data pre-processing
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
|
|
37 |
with open('sample.csv', 'rb') as f:
|
38 |
-
st.download_button("Download our sample CSV", f, file_name='sample.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
from datetime import datetime
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import pmdarima as pm
|
7 |
+
from pmdarima import auto_arima
|
8 |
+
|
9 |
+
import torch
|
10 |
+
from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
|
11 |
+
|
12 |
+
# Preprocessing
|
13 |
+
def merge(B, C, A):
|
14 |
+
i = j = k = 0
|
15 |
+
|
16 |
+
# Convert 'Date' columns to datetime.date objects
|
17 |
+
B['Date'] = pd.to_datetime(B['Date']).dt.date
|
18 |
+
C['Date'] = pd.to_datetime(C['Date']).dt.date
|
19 |
+
A['Date'] = pd.to_datetime(A['Date']).dt.date
|
20 |
+
|
21 |
+
while i < len(B) and j < len(C):
|
22 |
+
if B['Date'].iloc[i] <= C['Date'].iloc[j]:
|
23 |
+
A['Date'].iloc[k] = B['Date'].iloc[i]
|
24 |
+
A['Sales'].iloc[k] = B['Sales'].iloc[i]
|
25 |
+
i += 1
|
26 |
+
|
27 |
+
else:
|
28 |
+
A['Date'].iloc[k] = C['Date'].iloc[j]
|
29 |
+
A['Sales'].iloc[k] = C['Sales'].iloc[j]
|
30 |
+
j += 1
|
31 |
+
k += 1
|
32 |
+
|
33 |
+
while i < len(B):
|
34 |
+
A['Date'].iloc[k] = B['Date'].iloc[i]
|
35 |
+
A['Sales'].iloc[k] = B['Sales'].iloc[i]
|
36 |
+
i += 1
|
37 |
+
k += 1
|
38 |
+
|
39 |
+
while j < len(C):
|
40 |
+
A['Date'].iloc[k] = C['Date'].iloc[j]
|
41 |
+
A['Sales'].iloc[k] = C['Sales'].iloc[j]
|
42 |
+
j += 1
|
43 |
+
k += 1
|
44 |
+
|
45 |
+
return A
|
46 |
+
|
47 |
+
def merge_sort(dataframe):
|
48 |
+
if len(dataframe) > 1:
|
49 |
+
center = len(dataframe) // 2
|
50 |
+
left = dataframe.iloc[:center]
|
51 |
+
right = dataframe.iloc[center:]
|
52 |
+
merge_sort(left)
|
53 |
+
merge_sort(right)
|
54 |
+
|
55 |
+
return merge(left, right, dataframe)
|
56 |
+
|
57 |
+
else:
|
58 |
+
return dataframe
|
59 |
+
|
60 |
+
def drop (dataframe):
|
61 |
+
def get_columns_containing(dataframe, substrings):
|
62 |
+
return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
|
63 |
+
|
64 |
+
columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
|
65 |
+
dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
|
66 |
+
dataframe = dataframe.dropna()
|
67 |
+
|
68 |
+
return dataframe
|
69 |
+
|
70 |
+
def date_format(dataframe):
|
71 |
+
for i, d, s in dataframe.itertuples():
|
72 |
+
dataframe['Date'][i] = dataframe['Date'][i].strip()
|
73 |
+
|
74 |
+
for i, d, s in dataframe.itertuples():
|
75 |
+
new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
|
76 |
+
dataframe['Date'][i] = new_date
|
77 |
+
|
78 |
+
return dataframe
|
79 |
+
|
80 |
+
def group_to_three(dataframe):
|
81 |
+
dataframe['Date'] = pd.to_datetime(dataframe['Date'])
|
82 |
+
dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
|
83 |
+
dataframe = dataframe.replace(0, pd.np.nan).dropna()
|
84 |
+
|
85 |
+
return dataframe
|
86 |
+
|
87 |
+
# SARIMAX Model
|
88 |
+
def train_test(dataframe, n):
|
89 |
+
training_y = dataframe.iloc[:-n,0]
|
90 |
+
test_y = dataframe.iloc[-n:,0]
|
91 |
+
test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
|
92 |
+
training_X = dataframe.iloc[:-n,1:]
|
93 |
+
test_X = dataframe.iloc[-n:,1:]
|
94 |
+
future_X = dataframe.iloc[0:,1:]
|
95 |
+
return (training_y, test_y, test_y_series, training_X, test_X, future_X)
|
96 |
+
|
97 |
+
def model_fitting(dataframe, Exo):
|
98 |
+
futureModel = pm.auto_arima(dataframe['Sales'], X=Exo, start_p=1, start_q=1,
|
99 |
+
test='adf',min_p=1,min_q=1,
|
100 |
+
max_p=3, max_q=3, m=12,
|
101 |
+
start_P=0, seasonal=True,
|
102 |
+
d=None, D=1, trace=True,
|
103 |
+
error_action='ignore',
|
104 |
+
suppress_warnings=True,
|
105 |
+
stepwise=True)
|
106 |
+
model = futureModel
|
107 |
+
return model
|
108 |
+
|
109 |
+
def test_fitting(dataframe, Exo, trainY):
|
110 |
+
trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
|
111 |
+
test='adf',min_p=1,min_q=1,
|
112 |
+
max_p=3, max_q=3, m=12,
|
113 |
+
start_P=0, seasonal=True,
|
114 |
+
d=None, D=1, trace=True,
|
115 |
+
error_action='ignore',
|
116 |
+
suppress_warnings=True,
|
117 |
+
stepwise=True)
|
118 |
+
model = trainTestModel
|
119 |
+
return model
|
120 |
+
|
121 |
+
def forecast_accuracy(forecast, actual):
|
122 |
+
mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
|
123 |
+
rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
|
124 |
+
corr = np.corrcoef(forecast, actual)[0,1] # corr
|
125 |
+
mins = np.amin(np.hstack([forecast[:,None],
|
126 |
+
actual[:,None]]), axis=1)
|
127 |
+
maxs = np.amax(np.hstack([forecast[:,None],
|
128 |
+
actual[:,None]]), axis=1)
|
129 |
+
minmax = 1 - np.mean(mins/maxs) # minmax
|
130 |
+
return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
|
131 |
+
|
132 |
+
def sales_growth(dataframe, fittedValues):
|
133 |
+
sales_growth = fittedValues.to_frame()
|
134 |
+
sales_growth = sales_growth.reset_index()
|
135 |
+
sales_growth.columns = ("Date", "Sales")
|
136 |
+
sales_growth = sales_growth.set_index('Date')
|
137 |
+
|
138 |
+
sales_growth['Sales'] = (sales_growth['Sales']).round(2)
|
139 |
+
|
140 |
+
#Calculate and create the column for sales difference and growth
|
141 |
+
sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
|
142 |
+
sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
|
143 |
+
|
144 |
+
#Calculate and create the first row for sales difference and growth
|
145 |
+
sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
|
146 |
+
sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
|
147 |
+
|
148 |
+
|
149 |
+
return sales_growth
|
150 |
+
|
151 |
+
# TAPAS Model
|
152 |
+
model_name = "google/tapas-large-finetuned-wtq"
|
153 |
+
@st.cache
|
154 |
+
def load_tapas_model(model_name):
|
155 |
+
tokenizer = TapasTokenizer.from_pretrained(model_name)
|
156 |
+
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
|
157 |
+
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
|
158 |
+
return pipe
|
159 |
+
|
160 |
+
pipe = load_tapas_model(model_name)
|
161 |
+
|
162 |
+
def get_answer(table, query):
|
163 |
+
answers = pipe(table=table, query=query)
|
164 |
+
print(answers['coordinates']) # FOR DEBUGGING PURPOSES
|
165 |
+
return answers
|
166 |
+
|
167 |
+
def convert_answer(answer):
|
168 |
+
if answer['aggregator'] == 'SUM':
|
169 |
+
print(answer['answer']) # FOR DEBUGGING
|
170 |
+
cells = answer['cells']
|
171 |
+
converted = sum(float(value.replace(',', '')) for value in cells)
|
172 |
+
return converted
|
173 |
+
|
174 |
+
if answer['aggregator'] == 'AVERAGE':
|
175 |
+
print(answer['answer']) # FOR DEBUGGING
|
176 |
+
cells = answer['cells']
|
177 |
+
values = [float(value.replace(',', '')) for value in cells]
|
178 |
+
converted = sum(values) / len(values)
|
179 |
+
return converted
|
180 |
+
|
181 |
+
if answer['aggregator'] == 'COUNT':
|
182 |
+
print(answer['answer']) # FOR DEBUGGING
|
183 |
+
cells = answer['cells']
|
184 |
+
converted = sum(int(value.replace(',', '')) for value in cells)
|
185 |
+
return converted
|
186 |
+
|
187 |
+
else:
|
188 |
+
return answer
|
189 |
+
|
190 |
+
def get_converted_answer(table, query):
|
191 |
+
converted_answer = convert_answer(get_answer(table, query))
|
192 |
+
return converted_answer
|
193 |
|
194 |
st.title("Sales Forecasting Dashboard")
|
195 |
st.write("📈 Welcome User, start using the application by uploading your file in the sidebbar!")
|
|
|
199 |
page_icon="📈",
|
200 |
layout="wide",
|
201 |
initial_sidebar_state="expanded",
|
202 |
+
)
|
203 |
|
204 |
+
if 'uploaded' not in st.session_state:
|
205 |
+
st.session_state.uploaded = 'uploaded'
|
206 |
|
207 |
# Sidebar Menu
|
208 |
with st.sidebar:
|
|
|
217 |
df = pd.read_csv(uploaded_file, parse_dates=True)
|
218 |
st.write("Your uploaded data:")
|
219 |
st.write(df)
|
220 |
+
|
221 |
# Data pre-processing
|
222 |
+
df = drop(df)
|
223 |
+
df = date_format(df)
|
224 |
+
merge_sort(df)
|
225 |
+
df = group_to_three(df)
|
226 |
+
st.session_state.uploaded = True
|
227 |
+
|
228 |
with open('sample.csv', 'rb') as f:
|
229 |
+
st.download_button("Download our sample CSV", f, file_name='sample.csv')
|
230 |
+
|
231 |
+
if (st.session_state.uploaded):
|
232 |
+
st.line_chart(df)
|
233 |
+
|
234 |
+
forecast_button_clicked = st.button(
|
235 |
+
'Start Forecasting',
|
236 |
+
key='forecast_button',
|
237 |
+
type="primary",
|
238 |
+
disabled=st.session_state.uploaded,
|
239 |
+
)
|
240 |
+
|
241 |
+
if (forecast_button_clicked):
|
242 |
+
# TODO call arima here
|
243 |
+
pass
|
modules/__init__.py
DELETED
@@ -1 +0,0 @@
|
|
1 |
-
__all__ = ["preprocessor", "arima", "tapas"]
|
|
|
|
modules/__pycache__/__init__.cpython-311.pyc
DELETED
Binary file (236 Bytes)
|
|
modules/__pycache__/arima.cpython-311.pyc
DELETED
Binary file (5.32 kB)
|
|
modules/__pycache__/preprocessor.cpython-311.pyc
DELETED
Binary file (5.09 kB)
|
|
modules/__pycache__/tapas.cpython-311.pyc
DELETED
Binary file (2.91 kB)
|
|
modules/arima.py
DELETED
@@ -1,68 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import pandas as pd
|
3 |
-
from datetime import datetime
|
4 |
-
import pmdarima as pm
|
5 |
-
from pmdarima import auto_arima
|
6 |
-
|
7 |
-
def train_test(dataframe, n):
|
8 |
-
training_y = dataframe.iloc[:-n,0]
|
9 |
-
test_y = dataframe.iloc[-n:,0]
|
10 |
-
test_y_series = pd.Series(test_y, index=dataframe.iloc[-n:, 0].index)
|
11 |
-
training_X = dataframe.iloc[:-n,1:]
|
12 |
-
test_X = dataframe.iloc[-n:,1:]
|
13 |
-
future_X = dataframe.iloc[0:,1:]
|
14 |
-
return (training_y, test_y, test_y_series, training_X, test_X, future_X)
|
15 |
-
|
16 |
-
def model_fitting(dataframe, Exo):
|
17 |
-
futureModel = pm.auto_arima(dataframe['Sales'], X=Exo, start_p=1, start_q=1,
|
18 |
-
test='adf',min_p=1,min_q=1,
|
19 |
-
max_p=3, max_q=3, m=12,
|
20 |
-
start_P=0, seasonal=True,
|
21 |
-
d=None, D=1, trace=True,
|
22 |
-
error_action='ignore',
|
23 |
-
suppress_warnings=True,
|
24 |
-
stepwise=True)
|
25 |
-
model = futureModel
|
26 |
-
return model
|
27 |
-
|
28 |
-
def test_fitting(dataframe, Exo, trainY):
|
29 |
-
trainTestModel = auto_arima(X = Exo, y = trainY, start_p=1, start_q=1,
|
30 |
-
test='adf',min_p=1,min_q=1,
|
31 |
-
max_p=3, max_q=3, m=12,
|
32 |
-
start_P=0, seasonal=True,
|
33 |
-
d=None, D=1, trace=True,
|
34 |
-
error_action='ignore',
|
35 |
-
suppress_warnings=True,
|
36 |
-
stepwise=True)
|
37 |
-
model = trainTestModel
|
38 |
-
return model
|
39 |
-
|
40 |
-
def forecast_accuracy(forecast, actual):
|
41 |
-
mape = np.mean(np.abs(forecast - actual)/np.abs(actual)).round(4) # MAPE
|
42 |
-
rmse = (np.mean((forecast - actual)**2)**.5).round(2) # RMSE
|
43 |
-
corr = np.corrcoef(forecast, actual)[0,1] # corr
|
44 |
-
mins = np.amin(np.hstack([forecast[:,None],
|
45 |
-
actual[:,None]]), axis=1)
|
46 |
-
maxs = np.amax(np.hstack([forecast[:,None],
|
47 |
-
actual[:,None]]), axis=1)
|
48 |
-
minmax = 1 - np.mean(mins/maxs) # minmax
|
49 |
-
return({'mape':mape, 'rmse':rmse, 'corr':corr, 'min-max':minmax})
|
50 |
-
|
51 |
-
def sales_growth(dataframe, fittedValues):
|
52 |
-
sales_growth = fittedValues.to_frame()
|
53 |
-
sales_growth = sales_growth.reset_index()
|
54 |
-
sales_growth.columns = ("Date", "Sales")
|
55 |
-
sales_growth = sales_growth.set_index('Date')
|
56 |
-
|
57 |
-
sales_growth['Sales'] = (sales_growth['Sales']).round(2)
|
58 |
-
|
59 |
-
#Calculate and create the column for sales difference and growth
|
60 |
-
sales_growth['Forecasted Sales First Difference']=(sales_growth['Sales']-sales_growth['Sales'].shift(1)).round(2)
|
61 |
-
sales_growth['Forecasted Sales Growth']=(((sales_growth['Sales']-sales_growth['Sales'].shift(1))/sales_growth['Sales'].shift(1))*100).round(2)
|
62 |
-
|
63 |
-
#Calculate and create the first row for sales difference and growth
|
64 |
-
sales_growth['Forecasted Sales First Difference'].iloc[0] = (dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2]).round(2)
|
65 |
-
sales_growth['Forecasted Sales Growth'].iloc[0]=(((dataframe['Sales'].iloc[-1]-dataframe['Sales'].iloc[-2])/dataframe['Sales'].iloc[-1])*100).round(2)
|
66 |
-
|
67 |
-
|
68 |
-
return sales_growth
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/preprocessor.py
DELETED
@@ -1,76 +0,0 @@
|
|
1 |
-
import pandas as pd
|
2 |
-
from datetime import datetime
|
3 |
-
|
4 |
-
def merge(B, C, A):
|
5 |
-
i = j = k = 0
|
6 |
-
|
7 |
-
# Convert 'Date' columns to datetime.date objects
|
8 |
-
B['Date'] = pd.to_datetime(B['Date']).dt.date
|
9 |
-
C['Date'] = pd.to_datetime(C['Date']).dt.date
|
10 |
-
A['Date'] = pd.to_datetime(A['Date']).dt.date
|
11 |
-
|
12 |
-
while i < len(B) and j < len(C):
|
13 |
-
if B['Date'].iloc[i] <= C['Date'].iloc[j]:
|
14 |
-
A['Date'].iloc[k] = B['Date'].iloc[i]
|
15 |
-
A['Sales'].iloc[k] = B['Sales'].iloc[i]
|
16 |
-
i += 1
|
17 |
-
|
18 |
-
else:
|
19 |
-
A['Date'].iloc[k] = C['Date'].iloc[j]
|
20 |
-
A['Sales'].iloc[k] = C['Sales'].iloc[j]
|
21 |
-
j += 1
|
22 |
-
k += 1
|
23 |
-
|
24 |
-
while i < len(B):
|
25 |
-
A['Date'].iloc[k] = B['Date'].iloc[i]
|
26 |
-
A['Sales'].iloc[k] = B['Sales'].iloc[i]
|
27 |
-
i += 1
|
28 |
-
k += 1
|
29 |
-
|
30 |
-
while j < len(C):
|
31 |
-
A['Date'].iloc[k] = C['Date'].iloc[j]
|
32 |
-
A['Sales'].iloc[k] = C['Sales'].iloc[j]
|
33 |
-
j += 1
|
34 |
-
k += 1
|
35 |
-
|
36 |
-
return A
|
37 |
-
|
38 |
-
def merge_sort(dataframe):
|
39 |
-
if len(dataframe) > 1:
|
40 |
-
center = len(dataframe) // 2
|
41 |
-
left = dataframe.iloc[:center]
|
42 |
-
right = dataframe.iloc[center:]
|
43 |
-
merge_sort(left)
|
44 |
-
merge_sort(right)
|
45 |
-
|
46 |
-
return merge(left, right, dataframe)
|
47 |
-
|
48 |
-
else:
|
49 |
-
return dataframe
|
50 |
-
|
51 |
-
def drop (dataframe):
|
52 |
-
def get_columns_containing(dataframe, substrings):
|
53 |
-
return [col for col in dataframe.columns if any(substring.lower() in col.lower() for substring in substrings)]
|
54 |
-
|
55 |
-
columns_to_keep = get_columns_containing(dataframe, ["date", "sale"])
|
56 |
-
dataframe = dataframe.drop(columns=dataframe.columns.difference(columns_to_keep))
|
57 |
-
dataframe = dataframe.dropna()
|
58 |
-
|
59 |
-
return dataframe
|
60 |
-
|
61 |
-
def date_format(dataframe):
|
62 |
-
for i, d, s in dataframe.itertuples():
|
63 |
-
dataframe['Date'][i] = dataframe['Date'][i].strip()
|
64 |
-
|
65 |
-
for i, d, s in dataframe.itertuples():
|
66 |
-
new_date = datetime.strptime(dataframe['Date'][i], "%m/%d/%Y").date()
|
67 |
-
dataframe['Date'][i] = new_date
|
68 |
-
|
69 |
-
return dataframe
|
70 |
-
|
71 |
-
def group_to_three(dataframe):
|
72 |
-
dataframe['Date'] = pd.to_datetime(dataframe['Date'])
|
73 |
-
dataframe = dataframe.groupby([pd.Grouper(key='Date', freq='3D')])['Sales'].mean().round(2)
|
74 |
-
dataframe = dataframe.replace(0, pd.np.nan).dropna()
|
75 |
-
|
76 |
-
return dataframe
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
modules/tapas.py
DELETED
@@ -1,43 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from transformers import pipeline, TapasTokenizer, TapasForQuestionAnswering
|
3 |
-
|
4 |
-
model_name = "google/tapas-large-finetuned-wtq"
|
5 |
-
|
6 |
-
# load the tokenizer and the model from huggingface model hub
|
7 |
-
tokenizer = TapasTokenizer.from_pretrained(model_name)
|
8 |
-
model = TapasForQuestionAnswering.from_pretrained(model_name, local_files_only=False)
|
9 |
-
|
10 |
-
# load the model and tokenizer into a question-answering pipeline
|
11 |
-
pipe = pipeline("table-question-answering", model=model, tokenizer=tokenizer)
|
12 |
-
|
13 |
-
def get_answer(table, query):
|
14 |
-
answers = pipe(table=table, query=query)
|
15 |
-
print(answers['coordinates']) # FOR DEBUGGING PURPOSES
|
16 |
-
return answers
|
17 |
-
|
18 |
-
def convert_answer(answer):
|
19 |
-
if answer['aggregator'] == 'SUM':
|
20 |
-
print(answer['answer']) # FOR DEBUGGING
|
21 |
-
cells = answer['cells']
|
22 |
-
converted = sum(float(value.replace(',', '')) for value in cells)
|
23 |
-
return converted
|
24 |
-
|
25 |
-
if answer['aggregator'] == 'AVERAGE':
|
26 |
-
print(answer['answer']) # FOR DEBUGGING
|
27 |
-
cells = answer['cells']
|
28 |
-
values = [float(value.replace(',', '')) for value in cells]
|
29 |
-
converted = sum(values) / len(values)
|
30 |
-
return converted
|
31 |
-
|
32 |
-
if answer['aggregator'] == 'COUNT':
|
33 |
-
print(answer['answer']) # FOR DEBUGGING
|
34 |
-
cells = answer['cells']
|
35 |
-
converted = sum(int(value.replace(',', '')) for value in cells)
|
36 |
-
return converted
|
37 |
-
|
38 |
-
else:
|
39 |
-
return answer
|
40 |
-
|
41 |
-
def get_converted_answer(table, query):
|
42 |
-
converted_answer = convert_answer(get_answer(table, query))
|
43 |
-
return converted_answer
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|