Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import joblib | |
from sklearn.compose import ColumnTransformer | |
from sklearn.preprocessing import RobustScaler | |
from lightgbm import LGBMRegressor | |
class RULPredictionApp: | |
def __init__(self): | |
# Load the trained model and preprocessor | |
self.model = joblib.load('model/best_model.pkl') | |
self.preprocessor = joblib.load('model/preprocessor.pkl') | |
self.df = pd.read_csv('dataset/ccm_rul_dataset.csv') | |
self.df_Arm500 = self.preprocess_data() | |
def preprocess_data(self): | |
df = self.df.copy() | |
# Combine date and time columns into single datetime columns | |
df['time_temperature_measurement'] = df['date'] + " " + df['time_temperature_measurement1'] | |
df['sample_time_continuous_caster'] = df['date'] + " " + df['sample_time_continuous_caster'] | |
# Convert to datetime | |
df['sample_time_continuous_caster'] = pd.to_datetime(df['sample_time_continuous_caster']) | |
df['date'] = pd.to_datetime(df['date']) | |
# Define relevant columns | |
cols = ['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement', | |
'workpiece_weight, tonn', 'cast_in_row', 'workpiece_slice_geometry', | |
'alloy_type', 'steel_weight_theoretical, tonn', 'metal_residue_grab1, tonn', | |
'steel_weight, tonn', 'resistance, tonn', 'swing_frequency, amount/minute', | |
'crystallizer_movement, mm', 'alloy_speed, meter/minute', | |
'water_consumption, liter/minute', 'water_temperature_delta, Celsius deg.', | |
'Ce, %', 'C, %', 'Si, %', 'Mn,%', 'S, %', 'P, %', 'Cr, %', 'Ni, %', | |
'Cu, %', 'As, %', 'Mo, %', 'Nb, %', 'Ti, %', 'V, %', 'Al, %', 'Ca, %', | |
'N, %', 'Pb, %', 'Mg, %', 'Zn, %', 'sleeve', 'num_crystallizer', 'num_stream', 'RUL'] | |
df = df[cols] | |
# Map categorical columns to numeric | |
df['workpiece_slice_geometry'] = df['workpiece_slice_geometry'].map({'150x150': 1, '180x180': 0}) | |
df.rename(columns={'workpiece_slice_geometry': 'workpiece_slice_geometry_150x150'}, inplace=True) | |
df['alloy_type'] = df['alloy_type'].map({'open': 1, 'close': 0}) | |
df.rename(columns={'alloy_type': 'alloy_type_open'}, inplace=True) | |
# Correct specific 'sleeve' value and convert to integer | |
df.loc[df['sleeve'] == '530314 К', 'sleeve'] = '540314000' | |
df['sleeve'] = df['sleeve'].astype(int) | |
# Drop rows with any missing values | |
df.dropna(axis=0, inplace=True) | |
# Filter dataset for steel type "Arm500" | |
df_Arm500 = df[df['steel_type'] == "Arm500"] | |
df_Arm500 = df_Arm500.drop(labels=['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement'], axis=1) | |
return df_Arm500 | |
def get_input_features(self): | |
x = self.df_Arm500.drop(['RUL'], axis=1) | |
input_features = {} | |
# Create sidebar inputs for each feature | |
for feature in x.columns: | |
input_features[feature] = st.sidebar.number_input(feature, value=float(x[feature].mean())) | |
return pd.DataFrame([input_features]) | |
def predict_rul(self, input_df): | |
# Transform input data using preprocessor and predict RUL | |
input_df_transformed = pd.DataFrame(self.preprocessor.transform(input_df), columns=input_df.columns) | |
prediction = self.model.predict(input_df_transformed) | |
return prediction | |
def plot_correlation_matrix(self): | |
st.header("Correlation Matrix") | |
st.write("The correlation matrix shows the correlation coefficients between the variables. It helps to understand the linear relationships between features.") | |
# Calculate correlation matrix and plot heatmap | |
corr_matrix = self.df_Arm500.corr() | |
plt.figure(figsize=(15, 10)) | |
sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=True, fmt=".2f") | |
st.pyplot(plt) | |
def plot_feature_importance(self): | |
st.header("Feature Importance") | |
st.write("The feature importance plot shows the relative importance of each feature in predicting the RUL. Higher values indicate more important features.") | |
# Get feature importance from model and plot | |
x = self.df_Arm500.drop(['RUL'], axis=1) | |
feature_importance = pd.DataFrame({ | |
'feature': x.columns, | |
'importance': self.model.feature_importances_ | |
}).sort_values(by='importance', ascending=True) | |
plt.figure(figsize=(10, 8)) | |
plt.barh(feature_importance['feature'], feature_importance['importance'], color='blue') | |
plt.xlabel('Importance') | |
plt.ylabel('Feature') | |
plt.title('Feature Importance') | |
st.pyplot(plt) | |
def plot_additional_graphs(self): | |
st.header("Additional Graphs") | |
# Plot RUL Distribution | |
st.subheader("RUL Distribution") | |
st.write("The distribution plot shows the distribution of Remaining Useful Life (RUL) in the dataset.") | |
plt.figure(figsize=(10, 6)) | |
sns.histplot(self.df_Arm500['RUL'], kde=True, color='blue') | |
plt.title("RUL Distribution") | |
st.pyplot(plt) | |
# Plot pairplot of selected features | |
st.subheader("Pairplot of Selected Features") | |
st.write("The pairplot shows the pairwise relationships between selected features. It helps to visualize the distribution and relationships between features.") | |
x = self.df_Arm500.drop(['RUL'], axis=1) | |
selected_features = st.multiselect('Select features for pairplot', list(x.columns), default=list(x.columns)[:5]) | |
if len(selected_features) > 1: | |
sns.pairplot(self.df_Arm500[selected_features]) | |
st.pyplot(plt) | |
def plot_actual_vs_prediction(self): | |
st.header("Actual vs Predicted RUL") | |
st.write("The scatter plot shows the relationship between actual RUL and predicted RUL. A line is added for reference, where points close to the line indicate accurate predictions.") | |
# Predict RUL on whole dataset and plot actual vs predicted | |
x = self.df_Arm500.drop(['RUL'], axis=1) | |
y = self.df_Arm500['RUL'] | |
y_pred = self.model.predict(pd.DataFrame(self.preprocessor.transform(x), columns=x.columns)) | |
plt.figure(figsize=(10, 6)) | |
plt.scatter(y, y_pred, alpha=0.5, color='green') | |
plt.xlabel("Actual RUL") | |
plt.ylabel("Predicted RUL") | |
plt.title("Actual vs Predicted RUL") | |
plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2) # Add a line for reference | |
st.pyplot(plt) | |
def run(self): | |
st.title("RUL Prediction and Data Visualization") | |
# Sidebar for user input | |
st.sidebar.header("Input Features") | |
input_df = self.get_input_features() | |
# Predict RUL with progress bar | |
if st.sidebar.button("Predict RUL"): | |
with st.spinner('Predicting...'): | |
prediction = self.predict_rul(input_df) | |
st.success(f"Predicted RUL: **{prediction[0]:.2f}**") | |
# Checkboxes for visualizations | |
if st.sidebar.checkbox("Show Correlation Matrix"): | |
self.plot_correlation_matrix() | |
if st.sidebar.checkbox("Show Feature Importance"): | |
self.plot_feature_importance() | |
if st.sidebar.checkbox("Show Additional Graphs"): | |
self.plot_additional_graphs() | |
if st.sidebar.checkbox("Show Actual vs Predicted RUL"): | |
self.plot_actual_vs_prediction() | |
# Display Data with scrollable head | |
st.subheader("Data Preview") | |
st.write("The data preview shows the first 5 rows of the dataset. You can scroll to view more columns.") | |
st.dataframe(self.df_Arm500, height=200) | |
if __name__ == "__main__": | |
app = RULPredictionApp() | |
app.run() | |