SALMAell commited on
Commit
26c3a8d
·
verified ·
1 Parent(s): d071fd7

import streamlit as st import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score # Titre de l'application st.title('California Housing Prices Prediction') # Charger les données california = fetch_california_housing() df = pd.DataFrame(california.data, columns=california.feature_names) df['MedHouseVal'] = california.target # Valeur médiane des maisons # Afficher les données st.write("## Data Overview") st.write(df.head()) # Visualiser les relations st.write("## Scatter Plot") fig, ax = plt.subplots() ax.scatter(df['AveRooms'], df['MedHouseVal']) ax.set_xlabel('Average number of rooms per dwelling') ax.set_ylabel('Median house value') st.pyplot(fig) # Sélectionner le prédicteur X = df[['AveRooms']] y = df['MedHouseVal'] # Diviser les données en ensembles d'entraînement et de test X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Créer et entraîner le modèle de régression linéaire lr_model = LinearRegression() lr_model.fit(X_train, y_train) y_pred = lr_model.predict(X_test) # Évaluer la performance du modèle rmse = np.sqrt(mean_squared_error(y_test, y_pred)) r2 = r2_score(y_test, y_pred) st.write(f'## Linear Regression Model Performance') st.write(f'RMSE: {rmse:.2f}') st.write(f'R-squared: {r2:.2f}') # Visualiser la ligne de régression st.write("## Regression Line") fig, ax = plt.subplots() ax.scatter(X_test['AveRooms'], y_test, color='blue') ax.plot(X_test['AveRooms'], y_pred, color='red') ax.set_xlabel('Average number of rooms per dwelling') ax.set_ylabel('Median house value') st.pyplot(fig) # Multirégression linéaire X = df.drop('MedHouseVal', axis=1) y = df['MedHouseVal'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) mlr_model = LinearRegression() mlr_model.fit(X_train, y_train) y_pred = mlr_model.predict(X_test) rmse_ml = np.sqrt(mean_squared_error(y_test, y_pred)) r2_ml = r2_score(y_test, y_pred) st.write(f'## Multilinear Regression Model Performance') st.write(f'RMSE: {rmse_ml:.2f}') st.write(f'R-squared: {r2_ml:.2f}') st.write("## Multilinear Regression Predictions") fig, ax = plt.subplots() ax.scatter(y_test, y_pred) ax.set_xlabel('Actual Median House Value') ax.set_ylabel('Predicted Median House Value') ax.set_title('Multilinear Regression Model Predictions') st.pyplot(fig)

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py CHANGED
@@ -1 +1,81 @@
1
  !pip install streamlit transformers datasets scikit-learn matplotlib
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  !pip install streamlit transformers datasets scikit-learn matplotlib
2
+ import streamlit as st
3
+ import numpy as np
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.datasets import fetch_california_housing
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.linear_model import LinearRegression
9
+ from sklearn.metrics import mean_squared_error, r2_score
10
+
11
+ # Titre de l'application
12
+ st.title('California Housing Prices Prediction')
13
+
14
+ # Charger les données
15
+ california = fetch_california_housing()
16
+ df = pd.DataFrame(california.data, columns=california.feature_names)
17
+ df['MedHouseVal'] = california.target # Valeur médiane des maisons
18
+
19
+ # Afficher les données
20
+ st.write("## Data Overview")
21
+ st.write(df.head())
22
+
23
+ # Visualiser les relations
24
+ st.write("## Scatter Plot")
25
+ fig, ax = plt.subplots()
26
+ ax.scatter(df['AveRooms'], df['MedHouseVal'])
27
+ ax.set_xlabel('Average number of rooms per dwelling')
28
+ ax.set_ylabel('Median house value')
29
+ st.pyplot(fig)
30
+
31
+ # Sélectionner le prédicteur
32
+ X = df[['AveRooms']]
33
+ y = df['MedHouseVal']
34
+
35
+ # Diviser les données en ensembles d'entraînement et de test
36
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
37
+
38
+ # Créer et entraîner le modèle de régression linéaire
39
+ lr_model = LinearRegression()
40
+ lr_model.fit(X_train, y_train)
41
+ y_pred = lr_model.predict(X_test)
42
+
43
+ # Évaluer la performance du modèle
44
+ rmse = np.sqrt(mean_squared_error(y_test, y_pred))
45
+ r2 = r2_score(y_test, y_pred)
46
+
47
+ st.write(f'## Linear Regression Model Performance')
48
+ st.write(f'RMSE: {rmse:.2f}')
49
+ st.write(f'R-squared: {r2:.2f}')
50
+
51
+ # Visualiser la ligne de régression
52
+ st.write("## Regression Line")
53
+ fig, ax = plt.subplots()
54
+ ax.scatter(X_test['AveRooms'], y_test, color='blue')
55
+ ax.plot(X_test['AveRooms'], y_pred, color='red')
56
+ ax.set_xlabel('Average number of rooms per dwelling')
57
+ ax.set_ylabel('Median house value')
58
+ st.pyplot(fig)
59
+
60
+ # Multirégression linéaire
61
+ X = df.drop('MedHouseVal', axis=1)
62
+ y = df['MedHouseVal']
63
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
64
+ mlr_model = LinearRegression()
65
+ mlr_model.fit(X_train, y_train)
66
+ y_pred = mlr_model.predict(X_test)
67
+
68
+ rmse_ml = np.sqrt(mean_squared_error(y_test, y_pred))
69
+ r2_ml = r2_score(y_test, y_pred)
70
+
71
+ st.write(f'## Multilinear Regression Model Performance')
72
+ st.write(f'RMSE: {rmse_ml:.2f}')
73
+ st.write(f'R-squared: {r2_ml:.2f}')
74
+
75
+ st.write("## Multilinear Regression Predictions")
76
+ fig, ax = plt.subplots()
77
+ ax.scatter(y_test, y_pred)
78
+ ax.set_xlabel('Actual Median House Value')
79
+ ax.set_ylabel('Predicted Median House Value')
80
+ ax.set_title('Multilinear Regression Model Predictions')
81
+ st.pyplot(fig)