File size: 8,125 Bytes
4b132af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import RobustScaler
from lightgbm import LGBMRegressor

class RULPredictionApp:
    def __init__(self):
        # Load the trained model and preprocessor
        self.model = joblib.load('model/best_model.pkl')
        self.preprocessor = joblib.load('model/preprocessor.pkl')
        self.df = pd.read_csv('dataset/ccm_rul_dataset.csv')
        self.df_Arm500 = self.preprocess_data()

    def preprocess_data(self):
        df = self.df.copy()

        # Combine date and time columns into single datetime columns
        df['time_temperature_measurement'] = df['date'] + " " + df['time_temperature_measurement1']
        df['sample_time_continuous_caster'] = df['date'] + " " + df['sample_time_continuous_caster']
        
        # Convert to datetime
        df['sample_time_continuous_caster'] = pd.to_datetime(df['sample_time_continuous_caster'])
        df['date'] = pd.to_datetime(df['date'])

        # Define relevant columns
        cols = ['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement',
                'workpiece_weight, tonn', 'cast_in_row', 'workpiece_slice_geometry',
                'alloy_type', 'steel_weight_theoretical, tonn', 'metal_residue_grab1, tonn',
                'steel_weight, tonn', 'resistance, tonn', 'swing_frequency, amount/minute',
                'crystallizer_movement, mm', 'alloy_speed, meter/minute',
                'water_consumption, liter/minute', 'water_temperature_delta, Celsius deg.',
                'Ce, %', 'C, %', 'Si, %', 'Mn,%', 'S, %', 'P, %', 'Cr, %', 'Ni, %',
                'Cu, %', 'As, %', 'Mo, %', 'Nb, %', 'Ti, %', 'V, %', 'Al, %', 'Ca, %',
                'N, %', 'Pb, %', 'Mg, %', 'Zn, %', 'sleeve', 'num_crystallizer', 'num_stream', 'RUL']

        df = df[cols]

        # Map categorical columns to numeric
        df['workpiece_slice_geometry'] = df['workpiece_slice_geometry'].map({'150x150': 1, '180x180': 0})
        df.rename(columns={'workpiece_slice_geometry': 'workpiece_slice_geometry_150x150'}, inplace=True)
        df['alloy_type'] = df['alloy_type'].map({'open': 1, 'close': 0})
        df.rename(columns={'alloy_type': 'alloy_type_open'}, inplace=True)

        # Correct specific 'sleeve' value and convert to integer
        df.loc[df['sleeve'] == '530314 К', 'sleeve'] = '540314000'
        df['sleeve'] = df['sleeve'].astype(int)
        
        # Drop rows with any missing values
        df.dropna(axis=0, inplace=True)

        # Filter dataset for steel type "Arm500"
        df_Arm500 = df[df['steel_type'] == "Arm500"]
        df_Arm500 = df_Arm500.drop(labels=['date', 'sample_time_continuous_caster', 'steel_type', 'doc_requirement'], axis=1)
        return df_Arm500

    def get_input_features(self):
        x = self.df_Arm500.drop(['RUL'], axis=1)
        input_features = {}
        
        # Create sidebar inputs for each feature
        for feature in x.columns:
            input_features[feature] = st.sidebar.number_input(feature, value=float(x[feature].mean()))
        
        return pd.DataFrame([input_features])

    def predict_rul(self, input_df):
        # Transform input data using preprocessor and predict RUL
        input_df_transformed = pd.DataFrame(self.preprocessor.transform(input_df), columns=input_df.columns)
        prediction = self.model.predict(input_df_transformed)
        return prediction

    def plot_correlation_matrix(self):
        st.header("Correlation Matrix")
        st.write("The correlation matrix shows the correlation coefficients between the variables. It helps to understand the linear relationships between features.")
        
        # Calculate correlation matrix and plot heatmap
        corr_matrix = self.df_Arm500.corr()
        plt.figure(figsize=(15, 10))
        sns.heatmap(corr_matrix, cmap='coolwarm', center=0, annot=True, fmt=".2f")
        st.pyplot(plt)

    def plot_feature_importance(self):
        st.header("Feature Importance")
        st.write("The feature importance plot shows the relative importance of each feature in predicting the RUL. Higher values indicate more important features.")
        
        # Get feature importance from model and plot
        x = self.df_Arm500.drop(['RUL'], axis=1)
        feature_importance = pd.DataFrame({
            'feature': x.columns,
            'importance': self.model.feature_importances_
        }).sort_values(by='importance', ascending=True)

        plt.figure(figsize=(10, 8))
        plt.barh(feature_importance['feature'], feature_importance['importance'], color='blue')
        plt.xlabel('Importance')
        plt.ylabel('Feature')
        plt.title('Feature Importance')
        st.pyplot(plt)

    def plot_additional_graphs(self):
        st.header("Additional Graphs")

        # Plot RUL Distribution
        st.subheader("RUL Distribution")
        st.write("The distribution plot shows the distribution of Remaining Useful Life (RUL) in the dataset.")
        plt.figure(figsize=(10, 6))
        sns.histplot(self.df_Arm500['RUL'], kde=True, color='blue')
        plt.title("RUL Distribution")
        st.pyplot(plt)

        # Plot pairplot of selected features
        st.subheader("Pairplot of Selected Features")
        st.write("The pairplot shows the pairwise relationships between selected features. It helps to visualize the distribution and relationships between features.")
        x = self.df_Arm500.drop(['RUL'], axis=1)
        selected_features = st.multiselect('Select features for pairplot', list(x.columns), default=list(x.columns)[:5])
        if len(selected_features) > 1:
            sns.pairplot(self.df_Arm500[selected_features])
            st.pyplot(plt)

    def plot_actual_vs_prediction(self):
        st.header("Actual vs Predicted RUL")
        st.write("The scatter plot shows the relationship between actual RUL and predicted RUL. A line is added for reference, where points close to the line indicate accurate predictions.")
        
        # Predict RUL on whole dataset and plot actual vs predicted
        x = self.df_Arm500.drop(['RUL'], axis=1)
        y = self.df_Arm500['RUL']
        y_pred = self.model.predict(pd.DataFrame(self.preprocessor.transform(x), columns=x.columns))

        plt.figure(figsize=(10, 6))
        plt.scatter(y, y_pred, alpha=0.5, color='green')
        plt.xlabel("Actual RUL")
        plt.ylabel("Predicted RUL")
        plt.title("Actual vs Predicted RUL")
        plt.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)  # Add a line for reference
        st.pyplot(plt)

    def run(self):
        st.title("RUL Prediction and Data Visualization")

        # Sidebar for user input
        st.sidebar.header("Input Features")
        input_df = self.get_input_features()

        # Predict RUL with progress bar
        if st.sidebar.button("Predict RUL"):
            with st.spinner('Predicting...'):
                prediction = self.predict_rul(input_df)
            st.success(f"Predicted RUL: **{prediction[0]:.2f}**")

        # Checkboxes for visualizations
        if st.sidebar.checkbox("Show Correlation Matrix"):
            self.plot_correlation_matrix()
        if st.sidebar.checkbox("Show Feature Importance"):
            self.plot_feature_importance()
        if st.sidebar.checkbox("Show Additional Graphs"):
            self.plot_additional_graphs()
        if st.sidebar.checkbox("Show Actual vs Predicted RUL"):
            self.plot_actual_vs_prediction()

        # Display Data with scrollable head
        st.subheader("Data Preview")
        st.write("The data preview shows the first 5 rows of the dataset. You can scroll to view more columns.")
        st.dataframe(self.df_Arm500, height=200)


if __name__ == "__main__":
    app = RULPredictionApp()
    app.run()