CodingMaster24 commited on
Commit
46f5562
·
verified ·
1 Parent(s): b90beb9

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -448
app.py DELETED
@@ -1,448 +0,0 @@
1
- import numpy as np
2
- import pandas as pd
3
- import matplotlib.pyplot as plt
4
- from statsmodels.tsa.arima.model import ARIMA
5
- from statsmodels.tsa.stattools import adfuller
6
- from statsmodels.tsa.statespace.sarimax import SARIMAX
7
- from sklearn.model_selection import train_test_split
8
- import matplotlib.image as mpimg
9
- import seaborn as sns
10
- import warnings
11
- import datetime as dt
12
- from sklearn.metrics import confusion_matrix
13
- import matplotlib.dates as mdates
14
- from pandas.tseries.offsets import DateOffset
15
- import streamlit as st
16
- # from pmdarima.arima import auto_arima
17
- from statsmodels.tsa.stattools import adfuller
18
- warnings.filterwarnings('ignore')
19
-
20
- """# Load Generation Data (Plant 1)"""
21
-
22
- from sklearn.model_selection import train_test_split
23
- from pmdarima.arima import auto_arima
24
- import warnings
25
- warnings.filterwarnings('ignore')
26
-
27
- st.title("Solar Plant Data Analysis and Forecasting")
28
-
29
- # File Upload
30
- uploaded_gen = st.file_uploader("Upload Generation Data CSV", type=["csv"], key="gen")
31
- uploaded_weather = st.file_uploader("Upload Weather Sensor Data CSV", type=["csv"], key="weather")
32
-
33
- def load_data(file):
34
- if file is not None:
35
- return pd.read_csv(file)
36
- return None
37
-
38
- # Load Data
39
- gen_data = load_data(uploaded_gen)
40
- weather_data = load_data(uploaded_weather)
41
-
42
- default_gen_data = pd.read_csv('Plant_1_Generation_Data.csv')
43
- default_weather_data = pd.read_csv('Plant_1_Weather_Sensor_Data.csv')
44
-
45
- if gen_data is None:
46
- gen_data = default_gen_data
47
- gen_1 = default_gen_data
48
- if weather_data is None:
49
- weather_data = default_weather_data
50
- sens_1 = default_weather_data
51
-
52
- # Data Preview
53
- st.subheader("Generation Data Preview")
54
- st.dataframe(gen_data.head())
55
-
56
- st.subheader("Weather Data Preview")
57
- st.dataframe(weather_data.head())
58
-
59
- st.subheader("Generation Data Preview")
60
- st.dataframe(gen_data.tail())
61
-
62
- st.subheader("Weather Data Preview")
63
- st.dataframe(weather_data.tail())
64
-
65
- st.subheader("Generation Data Preview")
66
- st.dataframe(gen_data.describe())
67
-
68
- st.subheader("Weather Data Preview")
69
- st.dataframe(weather_data.describe())
70
-
71
- # Filter out non-numeric columns
72
- numeric_data = gen_1.select_dtypes(include=['float64', 'int64'])
73
-
74
- # Calculate the correlation matrix on the numeric data
75
- corelation = numeric_data.corr()
76
-
77
- # Plot the heatmap
78
- fig, ax = plt.subplots(figsize=(14, 12))
79
- sns.heatmap(corelation, annot=True, ax=ax)
80
- st.pyplot(fig)
81
-
82
- st.dataframe(sens_1.tail())
83
-
84
- st.dataframe(sens_1.describe())
85
-
86
- # Filter out non-numeric columns
87
- numeric_data = sens_1.select_dtypes(include=['float64', 'int64'])
88
-
89
- # Calculate the correlation matrix on the numeric data
90
- corelation = numeric_data.corr()
91
-
92
- # Plot the heatmap
93
- fig, ax = plt.subplots(figsize=(14, 12))
94
- sns.heatmap(corelation, annot=True, ax=ax)
95
- st.pyplot(fig)
96
-
97
- """# Format 'DATE_TIME' column to datetime"""
98
-
99
- gen_data['DATE_TIME'] = pd.to_datetime(gen_data['DATE_TIME'], format='%d-%m-%Y %H:%M')
100
- weather_data['DATE_TIME'] = pd.to_datetime(weather_data['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')
101
-
102
- gen_1['DATE_TIME']= pd.to_datetime(gen_1['DATE_TIME'],format='%d-%m-%Y %H:%M')
103
- sens_1['DATE_TIME']= pd.to_datetime(sens_1['DATE_TIME'],format='%Y-%m-%d %H:%M:%S')
104
-
105
- """# Daily Yield & AC/DC Power from Generation Data"""
106
-
107
- gen_data_daily = gen_data.set_index('DATE_TIME').resample('D').sum().reset_index()
108
-
109
- """# Plot Daily Yield and AC/DC Power"""
110
-
111
- df_gen = gen_1.groupby('DATE_TIME').sum().reset_index()
112
- df_gen['time'] = df_gen['DATE_TIME'].dt.time
113
-
114
- # Create figure and axes
115
- fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
116
-
117
- # Daily yield plot
118
- df_gen.plot(x='DATE_TIME', y='DAILY_YIELD', color='navy', ax=ax[0])
119
- ax[0].set_title('Daily yield')
120
- ax[0].set_ylabel('kW', color='navy', fontsize=17)
121
-
122
- # AC & DC power plot
123
- df_gen.set_index('time').drop('DATE_TIME', axis=1)[['AC_POWER', 'DC_POWER']].plot(style='o', ax=ax[1])
124
- ax[1].set_title('AC power & DC power during day hours')
125
-
126
- # Display in Streamlit
127
- st.pyplot(fig)
128
-
129
- # Create another figure for additional plots
130
- fig2, ax2 = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
131
-
132
- # Daily and Total Yield plot
133
- gen_data.plot(x='DATE_TIME', y=['DAILY_YIELD', 'TOTAL_YIELD'], ax=ax2[0], title="Daily and Total Yield (Generation Data)")
134
-
135
- # AC Power & DC Power plot
136
- gen_data.plot(x='DATE_TIME', y=['AC_POWER', 'DC_POWER'], ax=ax2[1], title="AC Power & DC Power (Generation Data)")
137
-
138
- # Display the second figure in Streamlit
139
- st.pyplot(fig2)
140
-
141
- # Create a copy and extract the date
142
- daily_gen = df_gen.copy()
143
- daily_gen['date'] = daily_gen['DATE_TIME'].dt.date
144
-
145
- # Group by 'date' and sum only the numerical columns
146
- daily_gen = daily_gen.groupby('date').sum(numeric_only=True)
147
-
148
- # Plot the daily and total yield
149
- fig, ax = plt.subplots(ncols=2, dpi=100, figsize=(20, 5))
150
- daily_gen['DAILY_YIELD'].plot(ax=ax[0], color='navy')
151
- daily_gen['TOTAL_YIELD'].plot(kind='bar', ax=ax[1], color='navy')
152
-
153
- fig.autofmt_xdate(rotation=45)
154
- ax[0].set_title('Daily Yield')
155
- ax[1].set_title('Total Yield')
156
- ax[0].set_ylabel('kW', color='navy', fontsize=17)
157
- plt.show()
158
-
159
- # Group by 'DATE_TIME' and sum
160
- df_sens = sens_1.groupby('DATE_TIME').sum().reset_index()
161
- df_sens['time'] = df_sens['DATE_TIME'].dt.time
162
-
163
- # Plotting
164
- fig, ax = plt.subplots(ncols=2, nrows=1, dpi=100, figsize=(20, 5))
165
-
166
- # Irradiation plot
167
- df_sens.plot(x='time', y='IRRADIATION', ax=ax[0], style='o')
168
-
169
- # Ambient and Module Temperature plot
170
- df_sens.set_index('DATE_TIME').drop('time', axis=1)[['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE']].plot(ax=ax[1])
171
-
172
- # Setting titles and labels
173
- ax[0].set_title('Irradiation during day hours')
174
- ax[1].set_title('Ambient and Module Temperature')
175
- ax[0].set_ylabel('W/m²', color='navy', fontsize=17)
176
- ax[1].set_ylabel('°C', color='navy', fontsize=17)
177
-
178
- plt.show()
179
-
180
- """# % of DC power converted to AC power"""
181
-
182
- # Create a copy of the data
183
- loss = gen_1.copy()
184
-
185
- # Create a new 'day' column containing only the date part from 'DATE_TIME'
186
- loss['day'] = loss['DATE_TIME'].dt.date
187
-
188
- # Drop the 'DATE_TIME' column to prevent summing over datetime values
189
- loss = loss.drop(columns=['DATE_TIME'])
190
-
191
- # Group by 'day' and sum only numeric columns
192
- loss = loss.groupby('day').sum()
193
-
194
- # Calculate the percentage of DC power converted to AC power
195
- loss['losses'] = (loss['AC_POWER'] / loss['DC_POWER']) * 100
196
-
197
- # Plot the losses
198
- loss['losses'].plot(style='o--', figsize=(17, 5), label='Real Power')
199
-
200
- # Plot styling
201
- plt.title('% of DC power converted to AC power', size=17)
202
- plt.ylabel('DC power converted (%)', fontsize=14, color='red')
203
- plt.axhline(loss['losses'].mean(), linestyle='--', color='gray', label='mean')
204
- plt.legend()
205
- plt.show()
206
-
207
- """# DC Power"""
208
-
209
- sources=gen_1.copy()
210
- sources['time']=sources['DATE_TIME'].dt.time
211
- sources.set_index('time').groupby('SOURCE_KEY')['DC_POWER'].plot(style='o',legend=True,figsize=(20,10))
212
- plt.title('DC Power during day for all sources',size=17)
213
- plt.ylabel('DC POWER ( kW )',color='navy',fontsize=17)
214
- plt.show()
215
-
216
- """# DC POWER ( kW )"""
217
-
218
- dc_gen=gen_1.copy()
219
- dc_gen['time']=dc_gen['DATE_TIME'].dt.time
220
- dc_gen=dc_gen.groupby(['time','SOURCE_KEY'])['DC_POWER'].mean().unstack()
221
-
222
- cmap = sns.color_palette("Spectral", n_colors=12)
223
-
224
- fig,ax=plt.subplots(ncols=2,nrows=1,dpi=100,figsize=(20,6))
225
- dc_gen.iloc[:,0:11].plot(ax=ax[0],color=cmap)
226
- dc_gen.iloc[:,11:22].plot(ax=ax[1],color=cmap)
227
-
228
- ax[0].set_title('First 11 sources')
229
- ax[0].set_ylabel('DC POWER ( kW )',fontsize=17,color='navy')
230
- ax[1].set_title('Last 11 sources')
231
- plt.show()
232
-
233
- """# Irradiation, Ambient and Module Temperature from Weather Data"""
234
-
235
- fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
236
- weather_data.plot(x='DATE_TIME', y='IRRADIATION', ax=ax[0], title="Irradiation (Weather Data)")
237
- weather_data.plot(x='DATE_TIME', y=['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'], ax=ax[1], title="Ambient & Module Temperature (Weather Data)")
238
- plt.show()
239
-
240
- """# Real DC power converted (DC Power efficiency)"""
241
-
242
- gen_data['DC_POWER_CONVERTED'] = gen_data['DC_POWER'] * 0.98 # Assume 2% loss in conversion
243
- fig, ax = plt.subplots(figsize=(15, 5))
244
- gen_data.plot(x='DATE_TIME', y='DC_POWER_CONVERTED', ax=ax, title="DC Power Converted")
245
- plt.show()
246
-
247
- """# DC Power generated during day hours (Generation Data)"""
248
-
249
- day_data_gen = gen_data[(gen_data['DATE_TIME'].dt.hour >= 6) & (gen_data['DATE_TIME'].dt.hour <= 18)]
250
- fig, ax = plt.subplots(figsize=(15, 5))
251
- day_data_gen.plot(x='DATE_TIME', y='DC_POWER', ax=ax, title="DC Power Generated During Day Hours")
252
- plt.show()
253
-
254
- """# DC Power And Daily Yield"""
255
-
256
- temp1_gen=gen_1.copy()
257
-
258
- temp1_gen['time']=temp1_gen['DATE_TIME'].dt.time
259
- temp1_gen['day']=temp1_gen['DATE_TIME'].dt.date
260
-
261
-
262
- temp1_sens=sens_1.copy()
263
-
264
- temp1_sens['time']=temp1_sens['DATE_TIME'].dt.time
265
- temp1_sens['day']=temp1_sens['DATE_TIME'].dt.date
266
-
267
- # just for columns
268
- cols=temp1_gen.groupby(['time','day'])['DC_POWER'].mean().unstack()
269
-
270
- ax =temp1_gen.groupby(['time','day'])['DC_POWER'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30))
271
- temp1_gen.groupby(['time','day'])['DAILY_YIELD'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,20),style='-.',ax=ax)
272
-
273
- i=0
274
- for a in range(len(ax)):
275
- for b in range(len(ax[a])):
276
- ax[a,b].set_title(cols.columns[i],size=15)
277
- ax[a,b].legend(['DC_POWER','DAILY_YIELD'])
278
- i=i+1
279
-
280
- plt.tight_layout()
281
- plt.show()
282
-
283
- """# Module Temperature And Ambient Temperature"""
284
-
285
- ax= temp1_sens.groupby(['time','day'])['MODULE_TEMPERATURE'].mean().unstack().plot(subplots=True,layout=(17,2),figsize=(20,30))
286
- temp1_sens.groupby(['time','day'])['AMBIENT_TEMPERATURE'].mean().unstack().plot(subplots=True,layout=(17,2),figsize=(20,40),style='-.',ax=ax)
287
-
288
- i=0
289
- for a in range(len(ax)):
290
- for b in range(len(ax[a])):
291
- ax[a,b].axhline(50)
292
- ax[a,b].set_title(cols.columns[i],size=15)
293
- ax[a,b].legend(['Module Temperature','Ambient Temperature'])
294
- i=i+1
295
-
296
- plt.tight_layout()
297
- plt.show()
298
-
299
- """# DC_POWER And DAILY_YIELD"""
300
-
301
- worst_source=gen_1[gen_1['SOURCE_KEY']=='bvBOhCH3iADSZry']
302
- worst_source['time']=worst_source['DATE_TIME'].dt.time
303
- worst_source['day']=worst_source['DATE_TIME'].dt.date
304
-
305
- ax=worst_source.groupby(['time','day'])['DC_POWER'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30))
306
- worst_source.groupby(['time','day'])['DAILY_YIELD'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30),ax=ax,style='-.')
307
-
308
- i=0
309
- for a in range(len(ax)):
310
- for b in range(len(ax[a])):
311
- ax[a,b].set_title(cols.columns[i],size=15)
312
- ax[a,b].legend(['DC_POWER','DAILY_YIELD'])
313
- i=i+1
314
-
315
- plt.tight_layout()
316
- plt.show()
317
-
318
- """# Inverter Analysis (Generation Data)"""
319
-
320
- inverter_performance = gen_data.groupby('SOURCE_KEY')['DC_POWER'].mean().sort_values()
321
- print(f"Underperforming inverter: {inverter_performance.idxmin()}")
322
-
323
- """# Module temperature and Ambient Temperature on PLANT_1 (Weather Data)"""
324
-
325
- fig, ax = plt.subplots(figsize=(15, 5))
326
- weather_data.plot(x='DATE_TIME', y=['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'], ax=ax, title="Module and Ambient Temperature (Weather Data)")
327
- plt.show()
328
-
329
- """# Inverter in action (Generation Data)"""
330
-
331
- inverter_data = gen_data[gen_data['SOURCE_KEY'] == 'bvBOhCH3iADSZry']
332
- fig, ax = plt.subplots(figsize=(15, 5))
333
- inverter_data.plot(x='DATE_TIME', y=['AC_POWER', 'DC_POWER'], ax=ax, title="Inverter bvBOhCH3iADSZry")
334
- plt.show()
335
-
336
- """# Forecasting with ARIMA (Generation Data)"""
337
-
338
- df_daily_gen = gen_data_daily[['DATE_TIME', 'DAILY_YIELD']].set_index('DATE_TIME')
339
-
340
- """# Testing for stationarity"""
341
-
342
- result = adfuller(df_daily_gen['DAILY_YIELD'].dropna())
343
- print(f'ADF Statistic: {result[0]}')
344
- print(f'p-value: {result[1]}')
345
-
346
- """# Splitting the dataset"""
347
-
348
- train_gen, test_gen = train_test_split(df_daily_gen, test_size=0.2, shuffle=False)
349
-
350
- """# ARIMA model"""
351
-
352
- arima_model_gen = ARIMA(train_gen['DAILY_YIELD'], order=(5, 1, 0))
353
- arima_fit_gen = arima_model_gen.fit()
354
- forecast_arima_gen = arima_fit_gen.forecast(steps=len(test_gen))
355
- test_gen['Forecast_ARIMA'] = forecast_arima_gen
356
-
357
- """# Plot ARIMA Forecast"""
358
-
359
- fig, ax = plt.subplots(figsize=(15, 5))
360
- train_gen['DAILY_YIELD'].plot(ax=ax, label='Training Data')
361
- test_gen['DAILY_YIELD'].plot(ax=ax, label='Test Data')
362
- test_gen['Forecast_ARIMA'].plot(ax=ax, label='ARIMA Forecast')
363
- plt.legend()
364
- plt.show()
365
-
366
- """# SARIMA Model for Seasonal Data"""
367
-
368
- sarima_model = SARIMAX(train_gen['DAILY_YIELD'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
369
- sarima_fit = sarima_model.fit(disp=False)
370
- sarima_forecast = sarima_fit.forecast(steps=len(test_gen))
371
- test_gen['Forecast_SARIMA'] = sarima_forecast
372
-
373
- """# Plot SARIMA Forecast"""
374
-
375
- plt.figure(figsize=(15, 5))
376
- train_gen['DAILY_YIELD'].plot(label='Train')
377
- test_gen['DAILY_YIELD'].plot(label='Test')
378
- test_gen['Forecast_SARIMA'].plot(label='SARIMA Forecast')
379
- plt.legend()
380
- plt.title('SARIMA Model Forecast for Daily Yield (Generation Data)')
381
- plt.show()
382
-
383
- """# SARIMAX vs ARIMA Comparison (Generation Data)"""
384
-
385
- plt.figure(figsize=(15, 5))
386
- plt.plot(test_gen.index, test_gen['DAILY_YIELD'], label='Actual Test Data')
387
- plt.plot(test_gen.index, test_gen['Forecast_ARIMA'], label='ARIMA Forecast')
388
- plt.plot(test_gen.index, test_gen['Forecast_SARIMA'], label='SARIMA Forecast')
389
- plt.legend()
390
- plt.title("ARIMA vs SARIMA Forecast Comparison (Generation Data)")
391
- plt.savefig('first_plot.png', dpi=300, bbox_inches='tight')
392
- plt.show()
393
- plt.close()
394
-
395
- """# ARIMA Model"""
396
-
397
- pred_gen=gen_1.copy()
398
- pred_gen=pred_gen.groupby('DATE_TIME').sum()
399
- pred_gen=pred_gen['DAILY_YIELD'][-288:].reset_index()
400
- pred_gen.set_index('DATE_TIME',inplace=True)
401
- pred_gen.head()
402
-
403
- result = adfuller(pred_gen['DAILY_YIELD'])
404
- print('Augmented Dickey-Fuller Test:')
405
- labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
406
-
407
- for value,label in zip(result,labels):
408
- print(label+' : '+str(value) )
409
-
410
- if result[1] <= 0.05:
411
- print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
412
- else:
413
- print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
414
-
415
- train=pred_gen[:192]
416
- test=pred_gen[-96:]
417
- plt.figure(figsize=(15,5))
418
- plt.plot(train,label='Train',color='navy')
419
- plt.plot(test,label='Test',color='darkorange')
420
- plt.title('Last 4 days of daily yield',fontsize=17)
421
- plt.legend()
422
- plt.show()
423
-
424
- arima_model = auto_arima(train,start_p=0,d=1,start_q=0,max_p=4,max_d=4,max_q=4,start_P=0,D=1,start_Q=0,max_P=1,max_D=1,max_Q=1,m=96,seasonal=True,error_action='warn',trace=True,supress_warning=True,stepwise=True,random_state=20,n_fits=1)
425
-
426
- future_dates = [test.index[-1] + DateOffset(minutes=x) for x in range(0,2910,15) ]
427
-
428
- prediction=pd.DataFrame(arima_model.predict(n_periods=96),index=test.index)
429
- prediction.columns=['predicted_yield']
430
-
431
- fig,ax= plt.subplots(ncols=2,nrows=1,dpi=100,figsize=(17,5))
432
- ax[0].plot(train,label='Train',color='navy')
433
- ax[0].plot(test,label='Test',color='darkorange')
434
- ax[0].plot(prediction,label='Prediction',color='green')
435
- ax[0].legend()
436
- ax[0].set_title('Forecast on test set',size=17)
437
- ax[0].set_ylabel('kW',color='navy',fontsize=17)
438
-
439
-
440
- f_prediction=pd.DataFrame(arima_model.predict(n_periods=194),index=future_dates)
441
- f_prediction.columns=['predicted_yield']
442
- ax[1].plot(pred_gen,label='Original data',color='navy')
443
- ax[1].plot(f_prediction,label='18th & 19th June',color='green')
444
- ax[1].legend()
445
- ax[1].set_title('Next days forecast',size=17)
446
- plt.show()
447
-
448
- arima_model.summary()