Spaces:
Runtime error
Runtime error
Azrieldr
commited on
Commit
•
e5258c2
1
Parent(s):
dbb80e9
main commit
Browse files- Prediction.py +130 -0
- app.py +10 -0
- cat_cols.json +1 -0
- dt_model.pkl +3 -0
- eda.py +101 -0
- image.png +0 -0
- le.pkl +3 -0
- logreg_model.pkl +3 -0
- num_cols.json +1 -0
- preprocessor.pkl +3 -0
- requirements.txt +8 -0
- rf_model.pkl +3 -0
Prediction.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pickle
|
3 |
+
import json
|
4 |
+
import pandas as pd
|
5 |
+
# from sklearn.pipeline import make_pipeline
|
6 |
+
# from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
7 |
+
# from sklearn.svm import SVC
|
8 |
+
# from sklearn.linear_model import LogisticRegression
|
9 |
+
# from sklearn.tree import DecisionTreeClassifier
|
10 |
+
# from sklearn.ensemble import RandomForestClassifier
|
11 |
+
|
12 |
+
|
13 |
+
with open('svc_model.pkl', 'rb') as f:
|
14 |
+
pipesvc = pickle.load(f)
|
15 |
+
|
16 |
+
with open('logreg_model.pkl', 'rb') as f:
|
17 |
+
pipeLR = pickle.load(f)
|
18 |
+
|
19 |
+
with open('dt_model.pkl', 'rb') as f:
|
20 |
+
pipeDT = pickle.load(f)
|
21 |
+
|
22 |
+
with open('rf_model.pkl', 'rb') as f:
|
23 |
+
pipeRF = pickle.load(f)
|
24 |
+
|
25 |
+
with open('preprocessor.pkl', 'rb') as f:
|
26 |
+
preprocessor = pickle.load(f)
|
27 |
+
|
28 |
+
with open('le.pkl', 'rb') as f:
|
29 |
+
Le = pickle.load(f)
|
30 |
+
|
31 |
+
with open('num_cols.json', 'r') as f:
|
32 |
+
numerical_cols = json.load(f)
|
33 |
+
|
34 |
+
with open('cat_cols.json', 'r') as f:
|
35 |
+
categorical_cols = json.load(f)
|
36 |
+
def run():
|
37 |
+
with st.form(key='form_prediksi'):
|
38 |
+
name = st.text_input('Nama', value='')
|
39 |
+
sex = st.radio('Kelamin', ('Perempuan', 'Laki-Laki'))
|
40 |
+
if sex=='Laki-Laki':
|
41 |
+
sexnum='M'
|
42 |
+
else: sexnum='F'
|
43 |
+
age= st.number_input('Umur', min_value=16, max_value=80, value=50, step=1)
|
44 |
+
smoking = st.radio('Apakah merokok?', ('Ya', 'Tidak'))
|
45 |
+
if smoking=='Ya':
|
46 |
+
smokingnum=2
|
47 |
+
else: smokingnum=1
|
48 |
+
Yelfing= st.radio('Apakah memiliki Yellow Finger?', ('Ya', 'Tidak'))
|
49 |
+
if Yelfing=='Ya':
|
50 |
+
yelfingnum=2
|
51 |
+
else: yelfingnum=1
|
52 |
+
anxeity = st.radio('Apakah memiliki Anxeity?', ('Ya', 'Tidak'))
|
53 |
+
if anxeity == 'Ya':
|
54 |
+
anxeitynum=2
|
55 |
+
else: anxeitynum=1
|
56 |
+
peer_pressure = st.radio('Apakah terdapat peer pressure?', ('Ya', 'Tidak'))
|
57 |
+
if peer_pressure=='Ya':
|
58 |
+
peer_pressurenum=2
|
59 |
+
else: peer_pressurenum=1
|
60 |
+
Crondis= st.radio('Apakah memiliki penyakit Kronis?', ('Ya', 'Tidak'))
|
61 |
+
if Crondis=='Ya':
|
62 |
+
crondisnum=2
|
63 |
+
else: crondisnum=1
|
64 |
+
Fatigue= st.radio('Apakah mudah capai?', ('Ya', 'Tidak'))
|
65 |
+
if Fatigue=='Ya':
|
66 |
+
fatiguenum=2
|
67 |
+
else: fatiguenum=1
|
68 |
+
alergi= st.radio('Apakah memiliki alergi?', ('Ya', 'Tidak'))
|
69 |
+
if alergi=='Ya':
|
70 |
+
alerginum=2
|
71 |
+
else: alerginum=1
|
72 |
+
mengi= st.radio('Apakah mengidap mengi?', ('Ya', 'Tidak'))
|
73 |
+
if mengi=='Ya':
|
74 |
+
menginum=2
|
75 |
+
else: menginum=1
|
76 |
+
Alkohol= st.radio('Apakah mengkonsumsi alkohol?', ('Ya', 'Tidak'))
|
77 |
+
if Alkohol=='Ya':
|
78 |
+
alkoholnum=2
|
79 |
+
else: alkoholnum=1
|
80 |
+
batuk= st.radio('Apakah ada batuk?', ('Ya', 'Tidak'))
|
81 |
+
if batuk=='Ya':
|
82 |
+
batuknum=2
|
83 |
+
else: batuknum=1
|
84 |
+
sesak= st.radio('Apakah terdapat sesak?', ('Ya', 'Tidak'))
|
85 |
+
if sesak=='Ya':
|
86 |
+
sesaknum=2
|
87 |
+
else: sesaknum=1
|
88 |
+
sutel= st.radio('Apakah susah untuk menalan?', ('Ya', 'Tidak'))
|
89 |
+
if sutel=='Ya':
|
90 |
+
sutelnum=2
|
91 |
+
else: sutelnum=1
|
92 |
+
sakda= st.radio('Apakah terdapat sakit di bagian dada?', ('Ya', 'Tidak'))
|
93 |
+
if sakda=='Ya':
|
94 |
+
sakdanum=2
|
95 |
+
else: sakdanum=1
|
96 |
+
|
97 |
+
submitted = st.form_submit_button('Predict')
|
98 |
+
|
99 |
+
data_inf = {'GENDER':sexnum,
|
100 |
+
'AGE': age,
|
101 |
+
'SMOKING':smokingnum,
|
102 |
+
'YELLOW_FINGERS':yelfingnum,
|
103 |
+
'ANXIETY':anxeitynum,
|
104 |
+
'PEER_PRESSURE':peer_pressurenum,
|
105 |
+
'CHRONIC DISEASE':crondisnum,
|
106 |
+
'FATIGUE ':fatiguenum,
|
107 |
+
'ALLERGY ':alerginum,
|
108 |
+
'WHEEZING':menginum,
|
109 |
+
'ALCOHOL CONSUMING':alkoholnum,
|
110 |
+
'COUGHING':batuknum,
|
111 |
+
'SHORTNESS OF BREATH':sesaknum,
|
112 |
+
'SWALLOWING DIFFICULTY':sutelnum,
|
113 |
+
'CHEST PAIN':sakdanum
|
114 |
+
}
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
if submitted:
|
119 |
+
data_inf = pd.DataFrame([data_inf])
|
120 |
+
y_pred_inf_rf = pipeRF.predict(data_inf)
|
121 |
+
y_pred_inf_DT = pipeDT.predict(data_inf)
|
122 |
+
y_pred_inf_LR = pipeLR.predict(data_inf)
|
123 |
+
y_pred_inf_SVC = pipesvc.predict(data_inf)
|
124 |
+
st.write('# hasil inf dari Randomforest', Le.inverse_transform(y_pred_inf_rf))
|
125 |
+
st.write('# hasil inf dari Decision Tree', Le.inverse_transform(y_pred_inf_DT))
|
126 |
+
st.write('# hasil inf dari SVC', Le.inverse_transform(y_pred_inf_SVC))
|
127 |
+
st.write('# hasil inf dari Logistic regression', Le.inverse_transform(y_pred_inf_LR))
|
128 |
+
|
129 |
+
if __name__== '__main__':
|
130 |
+
run()
|
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import Prediction
|
3 |
+
import eda
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Pilih Halaman: ', ('Predict', 'EDA'))
|
6 |
+
|
7 |
+
if navigation == 'Predict':
|
8 |
+
Prediction.run()
|
9 |
+
else:
|
10 |
+
eda.run()
|
cat_cols.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["GENDER"]
|
dt_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88766a40e0bdd36031d69dfeec60a997e2c1b904da5ab91bd2ae9d5519ee8657
|
3 |
+
size 8212
|
eda.py
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import seaborn as sns
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import plotly.express as px
|
6 |
+
from PIL import Image
|
7 |
+
|
8 |
+
st.set_page_config(
|
9 |
+
page_title= 'Prediksi Diagnosis Kanker Paru-Paru',
|
10 |
+
layout='wide',
|
11 |
+
initial_sidebar_state='expanded'
|
12 |
+
)
|
13 |
+
|
14 |
+
def run():
|
15 |
+
image = Image.open('image.png')
|
16 |
+
resized_image = image.resize((300, 300))
|
17 |
+
st.image(resized_image, caption='Serangan jantung')
|
18 |
+
|
19 |
+
st.title('Prediksi Serangan Jantung')
|
20 |
+
df = pd.read_csv('https://raw.githubusercontent.com/Azrieldr/latihan/master/survey%20lung%20cancer.csv')
|
21 |
+
st.dataframe(df)
|
22 |
+
|
23 |
+
|
24 |
+
yes_percentage = (df['LUNG_CANCER'].value_counts(normalize=True)*100)['YES']
|
25 |
+
# Create pie chart
|
26 |
+
fig, ax = plt.subplots(figsize=(10,15), dpi=100)
|
27 |
+
ax.pie([yes_percentage, 100-yes_percentage], labels=['Positif', 'Negatif'], autopct='%1.1f%%')
|
28 |
+
ax.set_title('Persentase Diagnosis Kanker Paru-paru')
|
29 |
+
st.pyplot(fig)
|
30 |
+
|
31 |
+
persentaseByGender=df.groupby('GENDER')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
|
32 |
+
print('persentase diagnosis kanker paru paru berdasarkan janis kelamin \n', persentaseByGender)
|
33 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
34 |
+
persentaseByGender.plot(kind='bar', ax=ax, color='#f4a7bb')
|
35 |
+
ax.set_title('Persentase Diagnosis Positif Berdasarkan Jenis Kelamin')
|
36 |
+
ax.set_xlabel('Jenis Kelamin')
|
37 |
+
ax.set_ylabel('Persentase (%)')
|
38 |
+
plt.xticks(rotation=0)
|
39 |
+
st.pyplot(fig)
|
40 |
+
|
41 |
+
|
42 |
+
# membuat dataframe copy dari dataframe awal
|
43 |
+
df1 = df.copy()
|
44 |
+
|
45 |
+
# membuat kolom baru dengan 1 berarti ya dan 0 berarti tidak
|
46 |
+
df1['Konsumsi Alkohol']=df1['ALCOHOL CONSUMING']-1
|
47 |
+
df1['Konsumsi Rokok']=df1['SMOKING']-1
|
48 |
+
|
49 |
+
#membuat table baru
|
50 |
+
persentaseByGender2=df1.groupby('GENDER')['Konsumsi Alkohol','Konsumsi Rokok'].mean()*100
|
51 |
+
persentaseByGender2=persentaseByGender2.T
|
52 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
53 |
+
persentaseByGender2.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
|
54 |
+
ax.set_title('Persentase Konsumsi Alkohol dan Rokok Berdasarkan Jenis Kelamin')
|
55 |
+
ax.set_xlabel('Jenis Kelamin')
|
56 |
+
ax.set_ylabel('Persentase (%)')
|
57 |
+
plt.xticks(rotation=0)
|
58 |
+
plt.legend(['F', 'M'])
|
59 |
+
st.pyplot(fig)
|
60 |
+
|
61 |
+
persentaseByAlc=df.groupby('ALCOHOL CONSUMING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
|
62 |
+
persentaseBySmk=df.groupby('SMOKING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
|
63 |
+
|
64 |
+
#merename sehingga nama kolom dari series baru yang akan dibuat berubah
|
65 |
+
persentaseByAlc=persentaseByAlc.rename('Alkohol')
|
66 |
+
persentaseBySmk=persentaseBySmk.rename('Rokok')
|
67 |
+
|
68 |
+
#sambungkan
|
69 |
+
gayaHidup=pd.concat([persentaseByAlc, persentaseBySmk], axis=1)
|
70 |
+
gayaHidup=gayaHidup.T
|
71 |
+
|
72 |
+
#membuat barplot
|
73 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
74 |
+
gayaHidup.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
|
75 |
+
ax.set_title('Persentase diagnosis berdasarkan gaya hidup')
|
76 |
+
ax.set_xlabel('Gaya Hidup')
|
77 |
+
ax.set_ylabel('Persentase (%)')
|
78 |
+
plt.xticks(rotation=0)
|
79 |
+
plt.legend(['Non-konsumen', 'Konsumen'])
|
80 |
+
st.pyplot(fig)
|
81 |
+
|
82 |
+
# membuat dataframe copy dari dataframe awal
|
83 |
+
df1 = df.copy()
|
84 |
+
|
85 |
+
# mengelompokkan data pada kolom AGE menjadi 5 kelompok
|
86 |
+
df1['group'] = pd.cut(df1['AGE'], bins=5)
|
87 |
+
|
88 |
+
# menghitung nilai rata-rata pada kolom LUNG_CANCER untuk setiap kelompok
|
89 |
+
result = df1.groupby('group')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
|
90 |
+
|
91 |
+
# plot hasilnya menggunakan seaborn dengan barplot berwarna pink
|
92 |
+
|
93 |
+
sns.set_style('whitegrid')
|
94 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
95 |
+
ax = sns.barplot(x=result.index, y=result, color='pink')
|
96 |
+
ax.set(xlabel='AGE Group', ylabel='Percentage of LUNG_CANCER (YES)')
|
97 |
+
st.pyplot(fig)
|
98 |
+
|
99 |
+
|
100 |
+
if __name__== '__main__':
|
101 |
+
run()
|
image.png
ADDED
![]() |
le.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b98c239c5a3fa55f0f5b62419b22d2ec1447437e4152dd7fa3cad0d119e1b2fa
|
3 |
+
size 253
|
logreg_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:24cbd766152b5de614599341af5878f4973cf2ee305454d563b9dedda9c745da
|
3 |
+
size 2963
|
num_cols.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["AGE", "SMOKING", "YELLOW_FINGERS", "ANXIETY", "PEER_PRESSURE", "CHRONIC DISEASE", "FATIGUE ", "ALLERGY ", "WHEEZING", "ALCOHOL CONSUMING", "COUGHING", "SHORTNESS OF BREATH", "SWALLOWING DIFFICULTY", "CHEST PAIN"]
|
preprocessor.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a626957a595c434e24a3d6b901c50091e2d6fe4b8e33858b0caca12b9087b18
|
3 |
+
size 725
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
seaborn
|
4 |
+
matplotlib
|
5 |
+
plotly
|
6 |
+
numpy
|
7 |
+
scikit-learn==1.0.2
|
8 |
+
Pillow
|
rf_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e45e7da4c71b7dc406b3414ea0d210260c9ab75f07744c4f307abea5a685ac0
|
3 |
+
size 551007
|