Spaces:

CodingMaster24
/

SolarAnalysis

Sleeping

App Files Files Community

CodingMaster24 commited on Mar 7

Commit

46f5562

verified ·

1 Parent(s): b90beb9

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -448

app.py DELETED Viewed

@@ -1,448 +0,0 @@
-import numpy as np
-import pandas as pd
-import matplotlib.pyplot as plt
-from statsmodels.tsa.arima.model import ARIMA
-from statsmodels.tsa.stattools import adfuller
-from statsmodels.tsa.statespace.sarimax import SARIMAX
-from sklearn.model_selection import train_test_split
-import matplotlib.image as mpimg
-import seaborn as sns
-import warnings
-import datetime as dt
-from sklearn.metrics import confusion_matrix
-import matplotlib.dates as mdates
-from pandas.tseries.offsets import DateOffset
-import streamlit as st
-# from pmdarima.arima import auto_arima
-from statsmodels.tsa.stattools import adfuller
-warnings.filterwarnings('ignore')
-"""# Load Generation Data (Plant 1)"""
-from sklearn.model_selection import train_test_split
-from pmdarima.arima import auto_arima
-import warnings
-warnings.filterwarnings('ignore')
-st.title("Solar Plant Data Analysis and Forecasting")
-# File Upload
-uploaded_gen = st.file_uploader("Upload Generation Data CSV", type=["csv"], key="gen")
-uploaded_weather = st.file_uploader("Upload Weather Sensor Data CSV", type=["csv"], key="weather")
-def load_data(file):
-    if file is not None:
-        return pd.read_csv(file)
-    return None
-# Load Data
-gen_data = load_data(uploaded_gen)
-weather_data = load_data(uploaded_weather)
-default_gen_data = pd.read_csv('Plant_1_Generation_Data.csv')
-default_weather_data = pd.read_csv('Plant_1_Weather_Sensor_Data.csv')
-if gen_data is None:
-    gen_data = default_gen_data
-    gen_1 = default_gen_data
-if weather_data is None:
-    weather_data = default_weather_data
-    sens_1 = default_weather_data
-# Data Preview
-st.subheader("Generation Data Preview")
-st.dataframe(gen_data.head())
-st.subheader("Weather Data Preview")
-st.dataframe(weather_data.head())
-st.subheader("Generation Data Preview")
-st.dataframe(gen_data.tail())
-st.subheader("Weather Data Preview")
-st.dataframe(weather_data.tail())
-st.subheader("Generation Data Preview")
-st.dataframe(gen_data.describe())
-st.subheader("Weather Data Preview")
-st.dataframe(weather_data.describe())
-# Filter out non-numeric columns
-numeric_data = gen_1.select_dtypes(include=['float64', 'int64'])
-# Calculate the correlation matrix on the numeric data
-corelation = numeric_data.corr()
-# Plot the heatmap
-fig, ax = plt.subplots(figsize=(14, 12))
-sns.heatmap(corelation, annot=True, ax=ax)
-st.pyplot(fig)
-st.dataframe(sens_1.tail())
-st.dataframe(sens_1.describe())
-# Filter out non-numeric columns
-numeric_data = sens_1.select_dtypes(include=['float64', 'int64'])
-# Calculate the correlation matrix on the numeric data
-corelation = numeric_data.corr()
-# Plot the heatmap
-fig, ax = plt.subplots(figsize=(14, 12))
-sns.heatmap(corelation, annot=True, ax=ax)
-st.pyplot(fig)
-"""# Format 'DATE_TIME' column to datetime"""
-gen_data['DATE_TIME'] = pd.to_datetime(gen_data['DATE_TIME'], format='%d-%m-%Y %H:%M')
-weather_data['DATE_TIME'] = pd.to_datetime(weather_data['DATE_TIME'], format='%Y-%m-%d %H:%M:%S')
-gen_1['DATE_TIME']= pd.to_datetime(gen_1['DATE_TIME'],format='%d-%m-%Y %H:%M')
-sens_1['DATE_TIME']= pd.to_datetime(sens_1['DATE_TIME'],format='%Y-%m-%d %H:%M:%S')
-"""# Daily Yield & AC/DC Power from Generation Data"""
-gen_data_daily = gen_data.set_index('DATE_TIME').resample('D').sum().reset_index()
-"""# Plot Daily Yield and AC/DC Power"""
-df_gen = gen_1.groupby('DATE_TIME').sum().reset_index()
-df_gen['time'] = df_gen['DATE_TIME'].dt.time
-# Create figure and axes
-fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
-# Daily yield plot
-df_gen.plot(x='DATE_TIME', y='DAILY_YIELD', color='navy', ax=ax[0])
-ax[0].set_title('Daily yield')
-ax[0].set_ylabel('kW', color='navy', fontsize=17)
-# AC & DC power plot
-df_gen.set_index('time').drop('DATE_TIME', axis=1)[['AC_POWER', 'DC_POWER']].plot(style='o', ax=ax[1])
-ax[1].set_title('AC power & DC power during day hours')
-# Display in Streamlit
-st.pyplot(fig)
-# Create another figure for additional plots
-fig2, ax2 = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
-# Daily and Total Yield plot
-gen_data.plot(x='DATE_TIME', y=['DAILY_YIELD', 'TOTAL_YIELD'], ax=ax2[0], title="Daily and Total Yield (Generation Data)")
-# AC Power & DC Power plot
-gen_data.plot(x='DATE_TIME', y=['AC_POWER', 'DC_POWER'], ax=ax2[1], title="AC Power & DC Power (Generation Data)")
-# Display the second figure in Streamlit
-st.pyplot(fig2)
-# Create a copy and extract the date
-daily_gen = df_gen.copy()
-daily_gen['date'] = daily_gen['DATE_TIME'].dt.date
-# Group by 'date' and sum only the numerical columns
-daily_gen = daily_gen.groupby('date').sum(numeric_only=True)
-# Plot the daily and total yield
-fig, ax = plt.subplots(ncols=2, dpi=100, figsize=(20, 5))
-daily_gen['DAILY_YIELD'].plot(ax=ax[0], color='navy')
-daily_gen['TOTAL_YIELD'].plot(kind='bar', ax=ax[1], color='navy')
-fig.autofmt_xdate(rotation=45)
-ax[0].set_title('Daily Yield')
-ax[1].set_title('Total Yield')
-ax[0].set_ylabel('kW', color='navy', fontsize=17)
-plt.show()
-# Group by 'DATE_TIME' and sum
-df_sens = sens_1.groupby('DATE_TIME').sum().reset_index()
-df_sens['time'] = df_sens['DATE_TIME'].dt.time
-# Plotting
-fig, ax = plt.subplots(ncols=2, nrows=1, dpi=100, figsize=(20, 5))
-# Irradiation plot
-df_sens.plot(x='time', y='IRRADIATION', ax=ax[0], style='o')
-# Ambient and Module Temperature plot
-df_sens.set_index('DATE_TIME').drop('time', axis=1)[['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE']].plot(ax=ax[1])
-# Setting titles and labels
-ax[0].set_title('Irradiation during day hours')
-ax[1].set_title('Ambient and Module Temperature')
-ax[0].set_ylabel('W/m²', color='navy', fontsize=17)
-ax[1].set_ylabel('°C', color='navy', fontsize=17)
-plt.show()
-"""# % of DC power converted to AC power"""
-# Create a copy of the data
-loss = gen_1.copy()
-# Create a new 'day' column containing only the date part from 'DATE_TIME'
-loss['day'] = loss['DATE_TIME'].dt.date
-# Drop the 'DATE_TIME' column to prevent summing over datetime values
-loss = loss.drop(columns=['DATE_TIME'])
-# Group by 'day' and sum only numeric columns
-loss = loss.groupby('day').sum()
-# Calculate the percentage of DC power converted to AC power
-loss['losses'] = (loss['AC_POWER'] / loss['DC_POWER']) * 100
-# Plot the losses
-loss['losses'].plot(style='o--', figsize=(17, 5), label='Real Power')
-# Plot styling
-plt.title('% of DC power converted to AC power', size=17)
-plt.ylabel('DC power converted (%)', fontsize=14, color='red')
-plt.axhline(loss['losses'].mean(), linestyle='--', color='gray', label='mean')
-plt.legend()
-plt.show()
-"""# DC Power"""
-sources=gen_1.copy()
-sources['time']=sources['DATE_TIME'].dt.time
-sources.set_index('time').groupby('SOURCE_KEY')['DC_POWER'].plot(style='o',legend=True,figsize=(20,10))
-plt.title('DC Power during day for all sources',size=17)
-plt.ylabel('DC POWER ( kW )',color='navy',fontsize=17)
-plt.show()
-"""# DC POWER ( kW )"""
-dc_gen=gen_1.copy()
-dc_gen['time']=dc_gen['DATE_TIME'].dt.time
-dc_gen=dc_gen.groupby(['time','SOURCE_KEY'])['DC_POWER'].mean().unstack()
-cmap = sns.color_palette("Spectral", n_colors=12)
-fig,ax=plt.subplots(ncols=2,nrows=1,dpi=100,figsize=(20,6))
-dc_gen.iloc[:,0:11].plot(ax=ax[0],color=cmap)
-dc_gen.iloc[:,11:22].plot(ax=ax[1],color=cmap)
-ax[0].set_title('First 11 sources')
-ax[0].set_ylabel('DC POWER ( kW )',fontsize=17,color='navy')
-ax[1].set_title('Last 11 sources')
-plt.show()
-"""# Irradiation, Ambient and Module Temperature from Weather Data"""
-fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))
-weather_data.plot(x='DATE_TIME', y='IRRADIATION', ax=ax[0], title="Irradiation (Weather Data)")
-weather_data.plot(x='DATE_TIME', y=['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'], ax=ax[1], title="Ambient & Module Temperature (Weather Data)")
-plt.show()
-"""# Real DC power converted (DC Power efficiency)"""
-gen_data['DC_POWER_CONVERTED'] = gen_data['DC_POWER'] * 0.98  # Assume 2% loss in conversion
-fig, ax = plt.subplots(figsize=(15, 5))
-gen_data.plot(x='DATE_TIME', y='DC_POWER_CONVERTED', ax=ax, title="DC Power Converted")
-plt.show()
-"""# DC Power generated during day hours (Generation Data)"""
-day_data_gen = gen_data[(gen_data['DATE_TIME'].dt.hour >= 6) & (gen_data['DATE_TIME'].dt.hour <= 18)]
-fig, ax = plt.subplots(figsize=(15, 5))
-day_data_gen.plot(x='DATE_TIME', y='DC_POWER', ax=ax, title="DC Power Generated During Day Hours")
-plt.show()
-"""# DC Power And Daily Yield"""
-temp1_gen=gen_1.copy()
-temp1_gen['time']=temp1_gen['DATE_TIME'].dt.time
-temp1_gen['day']=temp1_gen['DATE_TIME'].dt.date
-temp1_sens=sens_1.copy()
-temp1_sens['time']=temp1_sens['DATE_TIME'].dt.time
-temp1_sens['day']=temp1_sens['DATE_TIME'].dt.date
-# just for columns
-cols=temp1_gen.groupby(['time','day'])['DC_POWER'].mean().unstack()
-ax =temp1_gen.groupby(['time','day'])['DC_POWER'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30))
-temp1_gen.groupby(['time','day'])['DAILY_YIELD'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,20),style='-.',ax=ax)
-i=0
-for a in range(len(ax)):
-    for b in range(len(ax[a])):
-        ax[a,b].set_title(cols.columns[i],size=15)
-        ax[a,b].legend(['DC_POWER','DAILY_YIELD'])
-        i=i+1
-plt.tight_layout()
-plt.show()
-"""# Module Temperature And Ambient Temperature"""
-ax= temp1_sens.groupby(['time','day'])['MODULE_TEMPERATURE'].mean().unstack().plot(subplots=True,layout=(17,2),figsize=(20,30))
-temp1_sens.groupby(['time','day'])['AMBIENT_TEMPERATURE'].mean().unstack().plot(subplots=True,layout=(17,2),figsize=(20,40),style='-.',ax=ax)
-i=0
-for a in range(len(ax)):
-    for b in range(len(ax[a])):
-        ax[a,b].axhline(50)
-        ax[a,b].set_title(cols.columns[i],size=15)
-        ax[a,b].legend(['Module Temperature','Ambient Temperature'])
-        i=i+1
-plt.tight_layout()
-plt.show()
-"""# DC_POWER And DAILY_YIELD"""
-worst_source=gen_1[gen_1['SOURCE_KEY']=='bvBOhCH3iADSZry']
-worst_source['time']=worst_source['DATE_TIME'].dt.time
-worst_source['day']=worst_source['DATE_TIME'].dt.date
-ax=worst_source.groupby(['time','day'])['DC_POWER'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30))
-worst_source.groupby(['time','day'])['DAILY_YIELD'].mean().unstack().plot(sharex=True,subplots=True,layout=(17,2),figsize=(20,30),ax=ax,style='-.')
-i=0
-for a in range(len(ax)):
-    for b in range(len(ax[a])):
-        ax[a,b].set_title(cols.columns[i],size=15)
-        ax[a,b].legend(['DC_POWER','DAILY_YIELD'])
-        i=i+1
-plt.tight_layout()
-plt.show()
-"""# Inverter Analysis (Generation Data)"""
-inverter_performance = gen_data.groupby('SOURCE_KEY')['DC_POWER'].mean().sort_values()
-print(f"Underperforming inverter: {inverter_performance.idxmin()}")
-"""# Module temperature and Ambient Temperature on PLANT_1 (Weather Data)"""
-fig, ax = plt.subplots(figsize=(15, 5))
-weather_data.plot(x='DATE_TIME', y=['AMBIENT_TEMPERATURE', 'MODULE_TEMPERATURE'], ax=ax, title="Module and Ambient Temperature (Weather Data)")
-plt.show()
-"""# Inverter in action (Generation Data)"""
-inverter_data = gen_data[gen_data['SOURCE_KEY'] == 'bvBOhCH3iADSZry']
-fig, ax = plt.subplots(figsize=(15, 5))
-inverter_data.plot(x='DATE_TIME', y=['AC_POWER', 'DC_POWER'], ax=ax, title="Inverter bvBOhCH3iADSZry")
-plt.show()
-"""# Forecasting with ARIMA (Generation Data)"""
-df_daily_gen = gen_data_daily[['DATE_TIME', 'DAILY_YIELD']].set_index('DATE_TIME')
-"""# Testing for stationarity"""
-result = adfuller(df_daily_gen['DAILY_YIELD'].dropna())
-print(f'ADF Statistic: {result[0]}')
-print(f'p-value: {result[1]}')
-"""# Splitting the dataset"""
-train_gen, test_gen = train_test_split(df_daily_gen, test_size=0.2, shuffle=False)
-"""# ARIMA model"""
-arima_model_gen = ARIMA(train_gen['DAILY_YIELD'], order=(5, 1, 0))
-arima_fit_gen = arima_model_gen.fit()
-forecast_arima_gen = arima_fit_gen.forecast(steps=len(test_gen))
-test_gen['Forecast_ARIMA'] = forecast_arima_gen
-"""# Plot ARIMA Forecast"""
-fig, ax = plt.subplots(figsize=(15, 5))
-train_gen['DAILY_YIELD'].plot(ax=ax, label='Training Data')
-test_gen['DAILY_YIELD'].plot(ax=ax, label='Test Data')
-test_gen['Forecast_ARIMA'].plot(ax=ax, label='ARIMA Forecast')
-plt.legend()
-plt.show()
-"""# SARIMA Model for Seasonal Data"""
-sarima_model = SARIMAX(train_gen['DAILY_YIELD'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
-sarima_fit = sarima_model.fit(disp=False)
-sarima_forecast = sarima_fit.forecast(steps=len(test_gen))
-test_gen['Forecast_SARIMA'] = sarima_forecast
-"""# Plot SARIMA Forecast"""
-plt.figure(figsize=(15, 5))
-train_gen['DAILY_YIELD'].plot(label='Train')
-test_gen['DAILY_YIELD'].plot(label='Test')
-test_gen['Forecast_SARIMA'].plot(label='SARIMA Forecast')
-plt.legend()
-plt.title('SARIMA Model Forecast for Daily Yield (Generation Data)')
-plt.show()
-"""# SARIMAX vs ARIMA Comparison (Generation Data)"""
-plt.figure(figsize=(15, 5))
-plt.plot(test_gen.index, test_gen['DAILY_YIELD'], label='Actual Test Data')
-plt.plot(test_gen.index, test_gen['Forecast_ARIMA'], label='ARIMA Forecast')
-plt.plot(test_gen.index, test_gen['Forecast_SARIMA'], label='SARIMA Forecast')
-plt.legend()
-plt.title("ARIMA vs SARIMA Forecast Comparison (Generation Data)")
-plt.savefig('first_plot.png', dpi=300, bbox_inches='tight')
-plt.show()
-plt.close()
-"""# ARIMA Model"""
-pred_gen=gen_1.copy()
-pred_gen=pred_gen.groupby('DATE_TIME').sum()
-pred_gen=pred_gen['DAILY_YIELD'][-288:].reset_index()
-pred_gen.set_index('DATE_TIME',inplace=True)
-pred_gen.head()
-result = adfuller(pred_gen['DAILY_YIELD'])
-print('Augmented Dickey-Fuller Test:')
-labels = ['ADF Test Statistic','p-value','#Lags Used','Number of Observations Used']
-for value,label in zip(result,labels):
-    print(label+' : '+str(value) )
-if result[1] <= 0.05:
-    print("strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root and is stationary")
-else:
-    print("weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
-train=pred_gen[:192]
-test=pred_gen[-96:]
-plt.figure(figsize=(15,5))
-plt.plot(train,label='Train',color='navy')
-plt.plot(test,label='Test',color='darkorange')
-plt.title('Last 4 days of daily yield',fontsize=17)
-plt.legend()
-plt.show()
-arima_model = auto_arima(train,start_p=0,d=1,start_q=0,max_p=4,max_d=4,max_q=4,start_P=0,D=1,start_Q=0,max_P=1,max_D=1,max_Q=1,m=96,seasonal=True,error_action='warn',trace=True,supress_warning=True,stepwise=True,random_state=20,n_fits=1)
-future_dates = [test.index[-1] + DateOffset(minutes=x) for x in range(0,2910,15) ]
-prediction=pd.DataFrame(arima_model.predict(n_periods=96),index=test.index)
-prediction.columns=['predicted_yield']
-fig,ax= plt.subplots(ncols=2,nrows=1,dpi=100,figsize=(17,5))
-ax[0].plot(train,label='Train',color='navy')
-ax[0].plot(test,label='Test',color='darkorange')
-ax[0].plot(prediction,label='Prediction',color='green')
-ax[0].legend()
-ax[0].set_title('Forecast on test set',size=17)
-ax[0].set_ylabel('kW',color='navy',fontsize=17)
-f_prediction=pd.DataFrame(arima_model.predict(n_periods=194),index=future_dates)
-f_prediction.columns=['predicted_yield']
-ax[1].plot(pred_gen,label='Original data',color='navy')
-ax[1].plot(f_prediction,label='18th & 19th June',color='green')
-ax[1].legend()
-ax[1].set_title('Next days forecast',size=17)
-plt.show()
-arima_model.summary()