Spaces:

CodingMaster24
/

SolarAnalysis

Sleeping

App Files Files Community

SolarAnalysis / app.py

CodingMaster24

Rename solaranalysis.py to app.py

fcdeb41 verified 9 months ago

raw

history blame

15.2 kB

	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	from statsmodels.tsa.arima.model import ARIMA
	from statsmodels.tsa.stattools import adfuller
	from statsmodels.tsa.statespace.sarimax import SARIMAX
	from sklearn.model_selection import train_test_split
	import matplotlib.image as mpimg
	import seaborn as sns
	import warnings
	import datetime as dt
	from sklearn.metrics import confusion_matrix
	import matplotlib.dates as mdates
	from pandas.tseries.offsets import DateOffset
	import streamlit as st
	from pmdarima.arima import auto_arima
	from statsmodels.tsa.stattools import adfuller
	warnings.filterwarnings('ignore')

	"""# Load Generation Data (Plant 1)"""

	from sklearn.model_selection import train_test_split
	from pmdarima.arima import auto_arima
	import warnings
	warnings.filterwarnings('ignore')

	st.title("Solar Plant Data Analysis and Forecasting")

	# File Upload
	uploaded_gen = st.file_uploader("Upload Generation Data CSV", type=["csv"], key="gen")
	uploaded_weather = st.file_uploader("Upload Weather Sensor Data CSV", type=["csv"], key="weather")

	def load_data(file):
	if file is not None:
	return pd.read_csv(file)
	return None

	# Load Data
	gen_data = load_data(uploaded_gen)
	weather_data = load_data(uploaded_weather)

	default_gen_data = pd.read_csv('Plant_1_Generation_Data.csv')
	default_weather_data = pd.read_csv('Plant_1_Weather_Sensor_Data.csv')

	if gen_data is None:
	gen_data = default_gen_data
	gen_1 = default_gen_data
	if weather_data is None:
	weather_data = default_weather_data
	sens_1 = default_weather_data

	# Data Preview
	st.subheader("Generation Data Preview")
	st.dataframe(gen_data.head())

	st.subheader("Weather Data Preview")
	st.dataframe(weather_data.head())

	st.subheader("Generation Data Preview")
	st.dataframe(gen_data.tail())

	st.subheader("Weather Data Preview")
	st.dataframe(weather_data.tail())

	st.subheader("Generation Data Preview")
	st.dataframe(gen_data.describe())

	st.subheader("Weather Data Preview")
	st.dataframe(weather_data.describe())

	# Filter out non-numeric columns
	numeric_data = gen_1.select_dtypes(include=['float64', 'int64'])

	# Calculate the correlation matrix on the numeric data
	corelation = numeric_data.corr()

	# Plot the heatmap
	fig, ax = plt.subplots(figsize=(14, 12))
	sns.heatmap(corelation, annot=True, ax=ax)
	st.pyplot(fig)

	st.dataframe(sens_1.tail())

	st.dataframe(sens_1.describe())

	# Filter out non-numeric columns
	numeric_data = sens_1.select_dtypes(include=['float64', 'int64'])

	# Calculate the correlation matrix on the numeric data
	corelation = numeric_data.corr()

	# Plot the heatmap
	fig, ax = plt.subplots(figsize=(14, 12))
	sns.heatmap(corelation, annot=True, ax=ax)
	st.pyplot(fig)

	"""# Format 'DATE_TIME' column to datetime"""

	gen_data['DATE_TIME'] = pd.to_datetime(gen_data['DATE_TIME'], format='%d-%m-%Y %H:%M')
	weather_data['DATE_TIME'] = pd.to_datetime(weather_data['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')

	gen_1['DATE_TIME']= pd.to_datetime(gen_1['DATE_TIME'],format='%d-%m-%Y %H:%M')
	sens_1['DATE_TIME']= pd.to_datetime(sens_1['DATE_TIME'],format='%Y-%m-%d %H:%M:%S')

	"""# Daily Yield & AC/DC Power from Generation Data"""

	gen_data_daily = gen_data.set_index('DATE_TIME').resample('D').sum().reset_index()

	"""# Plot Daily Yield and AC/DC Power"""

	df_gen = gen_1.groupby('DATE_TIME').sum().reset_index()
	df_gen['time'] = df_gen['DATE_TIME'].dt.time

	# Create figure and axes
	fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))

	# Daily yield plot
	df_gen.plot(x='DATE_TIME', y='DAILY_YIELD', color='navy', ax=ax[0])
	ax[0].set_title('Daily yield')
	ax[0].set_ylabel('kW', color='navy', fontsize=17)

	# AC & DC power plot
	df_gen.set_index('time').drop('DATE_TIME', axis=1)[['AC_POWER', 'DC_POWER']].plot(style='o', ax=ax[1])
	ax[1].set_title('AC power & DC power during day hours')

	# Display in Streamlit
	st.pyplot(fig)

	# Create another figure for additional plots
	fig2, ax2 = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))

	# Daily and Total Yield plot
	gen_data.plot(x='DATE_TIME', y=['DAILY_YIELD', 'TOTAL_YIELD'], ax=ax2[0], title="Daily and Total Yield (Generation Data)")

	# AC Power & DC Power plot
	gen_data.plot(x='DATE_TIME', y=['AC_POWER', 'DC_POWER'], ax=ax2[1], title="AC Power & DC Power (Generation Data)")

	# Display the second figure in Streamlit
	st.pyplot(fig2)

	# Create a copy and extract the date
	daily_gen = df_gen.copy()
	daily_gen['date'] = daily_gen['DATE_TIME'].dt.date

	# Group by 'date' and sum only the numerical columns
	daily_gen = daily_gen.groupby('date').sum(numeric_only=True)

	# Plot the daily and total yield
	fig, ax = plt.subplots(ncols=2, dpi=100, figsize=(20, 5))
	daily_gen['DAILY_YIELD'].plot(ax=ax[0], color='navy')
	daily_gen['TOTAL_YIELD'].plot(kind='bar', ax=ax[1], color='navy')

	fig.autofmt_xdate(rotation=45)
	ax[0].set_title('Daily Yield')
	ax[1].set_title('Total Yield')
	ax[0].set_ylabel('kW', color='navy', fontsize=17)
	plt.show()

	# Group by 'DATE_TIME' and sum
	df_sens = sens_1.groupby('DATE_TIME').sum().reset_index()
	df_sens['time'] = df_sens['DATE_TIME'].dt.time

	# Plotting
	fig, ax = plt.subplots(ncols=2, nrows=1, dpi=100, figsize=(20, 5))

	# Irradiation plot
	df_sens.plot(x='time', y='IRRADIATION', ax=ax[0], style='o')

	# Ambient and Module Temperature plot
	df_sens.set_index('DATE_TIME').drop('time', axis=1)[['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE']].plot(ax=ax[1])

	# Setting titles and labels
	ax[0].set_title('Irradiation during day hours')
	ax[1].set_title('Ambient and Module Temperature')
	ax[0].set_ylabel('W/m²', color='navy', fontsize=17)
	ax[1].set_ylabel('°C', color='navy', fontsize=17)

	plt.show()

	"""# % of DC power converted to AC power"""

	# Create a copy of the data
	loss = gen_1.copy()

	# Create a new 'day' column containing only the date part from 'DATE_TIME'
	loss['day'] = loss['DATE_TIME'].dt.date

	# Drop the 'DATE_TIME' column to prevent summing over datetime values
	loss = loss.drop(columns=['DATE_TIME'])

	# Group by 'day' and sum only numeric columns
	loss = loss.groupby('day').sum()

	# Calculate the percentage of DC power converted to AC power
	loss['losses'] = (loss['AC_POWER'] / loss['DC_POWER']) * 100

	# Plot the losses
	loss['losses'].plot(style='o--', figsize=(17, 5), label='Real Power')

	# Plot styling
	plt.title('% of DC power converted to AC power', size=17)
	plt.ylabel('DC power converted (%)', fontsize=14, color='red')
	plt.axhline(loss['losses'].mean(), linestyle='--', color='gray', label='mean')
	plt.legend()
	plt.show()

	"""# DC Power"""

	sources=gen_1.copy()
	sources['time']=sources['DATE_TIME'].dt.time
	sources.set_index('time').groupby('SOURCE_KEY')['DC_POWER'].plot(style='o',legend=True,figsize=(20,10))
	plt.title('DC Power during day for all sources',size=17)
	plt.ylabel('DC POWER ( kW )',color='navy',fontsize=17)
	plt.show()

	"""# DC POWER ( kW )"""

	dc_gen=gen_1.copy()
	dc_gen['time']=dc_gen['DATE_TIME'].dt.time
	dc_gen=dc_gen.groupby(['time','SOURCE_KEY'])['DC_POWER'].mean().unstack()

	cmap = sns.color_palette("Spectral", n_colors=12)

	fig,ax=plt.subplots(ncols=2,nrows=1,dpi=100,figsize=(20,6))
	dc_gen.iloc[:,0:11].plot(ax=ax[0],color=cmap)
	dc_gen.iloc[:,11:22].plot(ax=ax[1],color=cmap)

	ax[0].set_title('First 11 sources')
	ax[0].set_ylabel('DC POWER ( kW )',fontsize=17,color='navy')
	ax[1].set_title('Last 11 sources')
	plt.show()

	"""# Irradiation, Ambient and Module Temperature from Weather Data"""

	fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
	weather_data.plot(x='DATE_TIME', y='IRRADIATION', ax=ax[0], title="Irradiation (Weather Data)")
	weather_data.plot(x='DATE_TIME', y=['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'], ax=ax[1], title="Ambient & Module Temperature (Weather Data)")
	plt.show()

	"""# Real DC power converted (DC Power efficiency)"""

	gen_data['DC_POWER_CONVERTED'] = gen_data['DC_POWER'] * 0.98 # Assume 2% loss in conversion
	fig, ax = plt.subplots(figsize=(15, 5))
	gen_data.plot(x='DATE_TIME', y='DC_POWER_CONVERTED', ax=ax, title="DC Power Converted")
	plt.show()

	"""# DC Power generated during day hours (Generation Data)"""

	day_data_gen = gen_data[(gen_data['DATE_TIME'].dt.hour >= 6) & (gen_data['DATE_TIME'].dt.hour <= 18)]
	fig, ax = plt.subplots(figsize=(15, 5))
	day_data_gen.plot(x='DATE_TIME', y='DC_POWER', ax=ax, title="DC Power Generated During Day Hours")
	plt.show()

	"""# DC Power And Daily Yield"""

	temp1_gen=gen_1.copy()

	temp1_gen['time']=temp1_gen['DATE_TIME'].dt.time
	temp1_gen['day']=temp1_gen['DATE_TIME'].dt.date


	temp1_sens=sens_1.copy()

	temp1_sens['time']=temp1_sens['DATE_TIME'].dt.time
	temp1_sens['day']=temp1_sens['DATE_TIME'].dt.date

	# just for columns
	cols=temp1_gen.groupby(['time','day'])['DC_POWER'].mean().unstack()

	ax =temp1_gen.groupby(['time','day'])['DC_POWER'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30))
	temp1_gen.groupby(['time','day'])['DAILY_YIELD'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,20),style='-.',ax=ax)

	i=0
	for a in range(len(ax)):
	for b in range(len(ax[a])):
	ax[a,b].set_title(cols.columns[i],size=15)
	ax[a,b].legend(['DC_POWER','DAILY_YIELD'])
	i=i+1

	plt.tight_layout()
	plt.show()

	"""# Module Temperature And Ambient Temperature"""

	ax= temp1_sens.groupby(['time','day'])['MODULE_TEMPERATURE'].mean().unstack().plot(subplots=True,layout=(17,2),figsize=(20,30))
	temp1_sens.groupby(['time','day'])['AMBIENT_TEMPERATURE'].mean().unstack().plot(subplots=True,layout=(17,2),figsize=(20,40),style='-.',ax=ax)

	i=0
	for a in range(len(ax)):
	for b in range(len(ax[a])):
	ax[a,b].axhline(50)
	ax[a,b].set_title(cols.columns[i],size=15)
	ax[a,b].legend(['Module Temperature','Ambient Temperature'])
	i=i+1

	plt.tight_layout()
	plt.show()

	"""# DC_POWER And DAILY_YIELD"""

	worst_source=gen_1[gen_1['SOURCE_KEY']=='bvBOhCH3iADSZry']
	worst_source['time']=worst_source['DATE_TIME'].dt.time
	worst_source['day']=worst_source['DATE_TIME'].dt.date

	ax=worst_source.groupby(['time','day'])['DC_POWER'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30))
	worst_source.groupby(['time','day'])['DAILY_YIELD'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30),ax=ax,style='-.')

	i=0
	for a in range(len(ax)):
	for b in range(len(ax[a])):
	ax[a,b].set_title(cols.columns[i],size=15)
	ax[a,b].legend(['DC_POWER','DAILY_YIELD'])
	i=i+1

	plt.tight_layout()
	plt.show()

	"""# Inverter Analysis (Generation Data)"""

	inverter_performance = gen_data.groupby('SOURCE_KEY')['DC_POWER'].mean().sort_values()
	print(f"Underperforming inverter: {inverter_performance.idxmin()}")

	"""# Module temperature and Ambient Temperature on PLANT_1 (Weather Data)"""

	fig, ax = plt.subplots(figsize=(15, 5))
	weather_data.plot(x='DATE_TIME', y=['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'], ax=ax, title="Module and Ambient Temperature (Weather Data)")
	plt.show()

	"""# Inverter in action (Generation Data)"""

	inverter_data = gen_data[gen_data['SOURCE_KEY'] == 'bvBOhCH3iADSZry']
	fig, ax = plt.subplots(figsize=(15, 5))
	inverter_data.plot(x='DATE_TIME', y=['AC_POWER', 'DC_POWER'], ax=ax, title="Inverter bvBOhCH3iADSZry")
	plt.show()

	"""# Forecasting with ARIMA (Generation Data)"""

	df_daily_gen = gen_data_daily[['DATE_TIME', 'DAILY_YIELD']].set_index('DATE_TIME')

	"""# Testing for stationarity"""

	result = adfuller(df_daily_gen['DAILY_YIELD'].dropna())
	print(f'ADF Statistic: {result[0]}')
	print(f'p-value: {result[1]}')

	"""# Splitting the dataset"""

	train_gen, test_gen = train_test_split(df_daily_gen, test_size=0.2, shuffle=False)

	"""# ARIMA model"""

	arima_model_gen = ARIMA(train_gen['DAILY_YIELD'], order=(5, 1, 0))
	arima_fit_gen = arima_model_gen.fit()
	forecast_arima_gen = arima_fit_gen.forecast(steps=len(test_gen))
	test_gen['Forecast_ARIMA'] = forecast_arima_gen

	"""# Plot ARIMA Forecast"""

	fig, ax = plt.subplots(figsize=(15, 5))
	train_gen['DAILY_YIELD'].plot(ax=ax, label='Training Data')
	test_gen['DAILY_YIELD'].plot(ax=ax, label='Test Data')
	test_gen['Forecast_ARIMA'].plot(ax=ax, label='ARIMA Forecast')
	plt.legend()
	plt.show()

	"""# SARIMA Model for Seasonal Data"""

	sarima_model = SARIMAX(train_gen['DAILY_YIELD'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
	sarima_fit = sarima_model.fit(disp=False)
	sarima_forecast = sarima_fit.forecast(steps=len(test_gen))
	test_gen['Forecast_SARIMA'] = sarima_forecast

	"""# Plot SARIMA Forecast"""

	plt.figure(figsize=(15, 5))
	train_gen['DAILY_YIELD'].plot(label='Train')
	test_gen['DAILY_YIELD'].plot(label='Test')
	test_gen['Forecast_SARIMA'].plot(label='SARIMA Forecast')
	plt.legend()
	plt.title('SARIMA Model Forecast for Daily Yield (Generation Data)')
	plt.show()

	"""# SARIMAX vs ARIMA Comparison (Generation Data)"""

	plt.figure(figsize=(15, 5))
	plt.plot(test_gen.index, test_gen['DAILY_YIELD'], label='Actual Test Data')
	plt.plot(test_gen.index, test_gen['Forecast_ARIMA'], label='ARIMA Forecast')
	plt.plot(test_gen.index, test_gen['Forecast_SARIMA'], label='SARIMA Forecast')
	plt.legend()
	plt.title("ARIMA vs SARIMA Forecast Comparison (Generation Data)")
	plt.savefig('first_plot.png', dpi=300, bbox_inches='tight')
	plt.show()
	plt.close()

	"""# ARIMA Model"""

	pred_gen=gen_1.copy()
	pred_gen=pred_gen.groupby('DATE_TIME').sum()
	pred_gen=pred_gen['DAILY_YIELD'][-288:].reset_index()
	pred_gen.set_index('DATE_TIME',inplace=True)
	pred_gen.head()

	result = adfuller(pred_gen['DAILY_YIELD'])
	print('Augmented Dickey-Fuller Test:')
	labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']

	for value,label in zip(result,labels):
	print(label+' : '+str(value) )

	if result[1] <= 0.05:
	print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
	else:
	print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")

	train=pred_gen[:192]
	test=pred_gen[-96:]
	plt.figure(figsize=(15,5))
	plt.plot(train,label='Train',color='navy')
	plt.plot(test,label='Test',color='darkorange')
	plt.title('Last 4 days of daily yield',fontsize=17)
	plt.legend()
	plt.show()

	arima_model = auto_arima(train,start_p=0,d=1,start_q=0,max_p=4,max_d=4,max_q=4,start_P=0,D=1,start_Q=0,max_P=1,max_D=1,max_Q=1,m=96,seasonal=True,error_action='warn',trace=True,supress_warning=True,stepwise=True,random_state=20,n_fits=1)

	future_dates = [test.index[-1] + DateOffset(minutes=x) for x in range(0,2910,15) ]

	prediction=pd.DataFrame(arima_model.predict(n_periods=96),index=test.index)
	prediction.columns=['predicted_yield']

	fig,ax= plt.subplots(ncols=2,nrows=1,dpi=100,figsize=(17,5))
	ax[0].plot(train,label='Train',color='navy')
	ax[0].plot(test,label='Test',color='darkorange')
	ax[0].plot(prediction,label='Prediction',color='green')
	ax[0].legend()
	ax[0].set_title('Forecast on test set',size=17)
	ax[0].set_ylabel('kW',color='navy',fontsize=17)


	f_prediction=pd.DataFrame(arima_model.predict(n_periods=194),index=future_dates)
	f_prediction.columns=['predicted_yield']
	ax[1].plot(pred_gen,label='Original data',color='navy')
	ax[1].plot(f_prediction,label='18th & 19th June',color='green')
	ax[1].legend()
	ax[1].set_title('Next days forecast',size=17)
	plt.show()

	arima_model.summary()