Azrieldr commited on
Commit
e5258c2
1 Parent(s): dbb80e9

main commit

Browse files
Files changed (12) hide show
  1. Prediction.py +130 -0
  2. app.py +10 -0
  3. cat_cols.json +1 -0
  4. dt_model.pkl +3 -0
  5. eda.py +101 -0
  6. image.png +0 -0
  7. le.pkl +3 -0
  8. logreg_model.pkl +3 -0
  9. num_cols.json +1 -0
  10. preprocessor.pkl +3 -0
  11. requirements.txt +8 -0
  12. rf_model.pkl +3 -0
Prediction.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pickle
3
+ import json
4
+ import pandas as pd
5
+ # from sklearn.pipeline import make_pipeline
6
+ # from sklearn.preprocessing import StandardScaler, OneHotEncoder
7
+ # from sklearn.svm import SVC
8
+ # from sklearn.linear_model import LogisticRegression
9
+ # from sklearn.tree import DecisionTreeClassifier
10
+ # from sklearn.ensemble import RandomForestClassifier
11
+
12
+
13
+ with open('svc_model.pkl', 'rb') as f:
14
+ pipesvc = pickle.load(f)
15
+
16
+ with open('logreg_model.pkl', 'rb') as f:
17
+ pipeLR = pickle.load(f)
18
+
19
+ with open('dt_model.pkl', 'rb') as f:
20
+ pipeDT = pickle.load(f)
21
+
22
+ with open('rf_model.pkl', 'rb') as f:
23
+ pipeRF = pickle.load(f)
24
+
25
+ with open('preprocessor.pkl', 'rb') as f:
26
+ preprocessor = pickle.load(f)
27
+
28
+ with open('le.pkl', 'rb') as f:
29
+ Le = pickle.load(f)
30
+
31
+ with open('num_cols.json', 'r') as f:
32
+ numerical_cols = json.load(f)
33
+
34
+ with open('cat_cols.json', 'r') as f:
35
+ categorical_cols = json.load(f)
36
+ def run():
37
+ with st.form(key='form_prediksi'):
38
+ name = st.text_input('Nama', value='')
39
+ sex = st.radio('Kelamin', ('Perempuan', 'Laki-Laki'))
40
+ if sex=='Laki-Laki':
41
+ sexnum='M'
42
+ else: sexnum='F'
43
+ age= st.number_input('Umur', min_value=16, max_value=80, value=50, step=1)
44
+ smoking = st.radio('Apakah merokok?', ('Ya', 'Tidak'))
45
+ if smoking=='Ya':
46
+ smokingnum=2
47
+ else: smokingnum=1
48
+ Yelfing= st.radio('Apakah memiliki Yellow Finger?', ('Ya', 'Tidak'))
49
+ if Yelfing=='Ya':
50
+ yelfingnum=2
51
+ else: yelfingnum=1
52
+ anxeity = st.radio('Apakah memiliki Anxeity?', ('Ya', 'Tidak'))
53
+ if anxeity == 'Ya':
54
+ anxeitynum=2
55
+ else: anxeitynum=1
56
+ peer_pressure = st.radio('Apakah terdapat peer pressure?', ('Ya', 'Tidak'))
57
+ if peer_pressure=='Ya':
58
+ peer_pressurenum=2
59
+ else: peer_pressurenum=1
60
+ Crondis= st.radio('Apakah memiliki penyakit Kronis?', ('Ya', 'Tidak'))
61
+ if Crondis=='Ya':
62
+ crondisnum=2
63
+ else: crondisnum=1
64
+ Fatigue= st.radio('Apakah mudah capai?', ('Ya', 'Tidak'))
65
+ if Fatigue=='Ya':
66
+ fatiguenum=2
67
+ else: fatiguenum=1
68
+ alergi= st.radio('Apakah memiliki alergi?', ('Ya', 'Tidak'))
69
+ if alergi=='Ya':
70
+ alerginum=2
71
+ else: alerginum=1
72
+ mengi= st.radio('Apakah mengidap mengi?', ('Ya', 'Tidak'))
73
+ if mengi=='Ya':
74
+ menginum=2
75
+ else: menginum=1
76
+ Alkohol= st.radio('Apakah mengkonsumsi alkohol?', ('Ya', 'Tidak'))
77
+ if Alkohol=='Ya':
78
+ alkoholnum=2
79
+ else: alkoholnum=1
80
+ batuk= st.radio('Apakah ada batuk?', ('Ya', 'Tidak'))
81
+ if batuk=='Ya':
82
+ batuknum=2
83
+ else: batuknum=1
84
+ sesak= st.radio('Apakah terdapat sesak?', ('Ya', 'Tidak'))
85
+ if sesak=='Ya':
86
+ sesaknum=2
87
+ else: sesaknum=1
88
+ sutel= st.radio('Apakah susah untuk menalan?', ('Ya', 'Tidak'))
89
+ if sutel=='Ya':
90
+ sutelnum=2
91
+ else: sutelnum=1
92
+ sakda= st.radio('Apakah terdapat sakit di bagian dada?', ('Ya', 'Tidak'))
93
+ if sakda=='Ya':
94
+ sakdanum=2
95
+ else: sakdanum=1
96
+
97
+ submitted = st.form_submit_button('Predict')
98
+
99
+ data_inf = {'GENDER':sexnum,
100
+ 'AGE': age,
101
+ 'SMOKING':smokingnum,
102
+ 'YELLOW_FINGERS':yelfingnum,
103
+ 'ANXIETY':anxeitynum,
104
+ 'PEER_PRESSURE':peer_pressurenum,
105
+ 'CHRONIC DISEASE':crondisnum,
106
+ 'FATIGUE ':fatiguenum,
107
+ 'ALLERGY ':alerginum,
108
+ 'WHEEZING':menginum,
109
+ 'ALCOHOL CONSUMING':alkoholnum,
110
+ 'COUGHING':batuknum,
111
+ 'SHORTNESS OF BREATH':sesaknum,
112
+ 'SWALLOWING DIFFICULTY':sutelnum,
113
+ 'CHEST PAIN':sakdanum
114
+ }
115
+
116
+
117
+
118
+ if submitted:
119
+ data_inf = pd.DataFrame([data_inf])
120
+ y_pred_inf_rf = pipeRF.predict(data_inf)
121
+ y_pred_inf_DT = pipeDT.predict(data_inf)
122
+ y_pred_inf_LR = pipeLR.predict(data_inf)
123
+ y_pred_inf_SVC = pipesvc.predict(data_inf)
124
+ st.write('# hasil inf dari Randomforest', Le.inverse_transform(y_pred_inf_rf))
125
+ st.write('# hasil inf dari Decision Tree', Le.inverse_transform(y_pred_inf_DT))
126
+ st.write('# hasil inf dari SVC', Le.inverse_transform(y_pred_inf_SVC))
127
+ st.write('# hasil inf dari Logistic regression', Le.inverse_transform(y_pred_inf_LR))
128
+
129
+ if __name__== '__main__':
130
+ run()
app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import Prediction
3
+ import eda
4
+
5
+ navigation = st.sidebar.selectbox('Pilih Halaman: ', ('Predict', 'EDA'))
6
+
7
+ if navigation == 'Predict':
8
+ Prediction.run()
9
+ else:
10
+ eda.run()
cat_cols.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["GENDER"]
dt_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88766a40e0bdd36031d69dfeec60a997e2c1b904da5ab91bd2ae9d5519ee8657
3
+ size 8212
eda.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import plotly.express as px
6
+ from PIL import Image
7
+
8
+ st.set_page_config(
9
+ page_title= 'Prediksi Diagnosis Kanker Paru-Paru',
10
+ layout='wide',
11
+ initial_sidebar_state='expanded'
12
+ )
13
+
14
+ def run():
15
+ image = Image.open('image.png')
16
+ resized_image = image.resize((300, 300))
17
+ st.image(resized_image, caption='Serangan jantung')
18
+
19
+ st.title('Prediksi Serangan Jantung')
20
+ df = pd.read_csv('https://raw.githubusercontent.com/Azrieldr/latihan/master/survey%20lung%20cancer.csv')
21
+ st.dataframe(df)
22
+
23
+
24
+ yes_percentage = (df['LUNG_CANCER'].value_counts(normalize=True)*100)['YES']
25
+ # Create pie chart
26
+ fig, ax = plt.subplots(figsize=(10,15), dpi=100)
27
+ ax.pie([yes_percentage, 100-yes_percentage], labels=['Positif', 'Negatif'], autopct='%1.1f%%')
28
+ ax.set_title('Persentase Diagnosis Kanker Paru-paru')
29
+ st.pyplot(fig)
30
+
31
+ persentaseByGender=df.groupby('GENDER')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
32
+ print('persentase diagnosis kanker paru paru berdasarkan janis kelamin \n', persentaseByGender)
33
+ fig, ax = plt.subplots(figsize=(8, 6))
34
+ persentaseByGender.plot(kind='bar', ax=ax, color='#f4a7bb')
35
+ ax.set_title('Persentase Diagnosis Positif Berdasarkan Jenis Kelamin')
36
+ ax.set_xlabel('Jenis Kelamin')
37
+ ax.set_ylabel('Persentase (%)')
38
+ plt.xticks(rotation=0)
39
+ st.pyplot(fig)
40
+
41
+
42
+ # membuat dataframe copy dari dataframe awal
43
+ df1 = df.copy()
44
+
45
+ # membuat kolom baru dengan 1 berarti ya dan 0 berarti tidak
46
+ df1['Konsumsi Alkohol']=df1['ALCOHOL CONSUMING']-1
47
+ df1['Konsumsi Rokok']=df1['SMOKING']-1
48
+
49
+ #membuat table baru
50
+ persentaseByGender2=df1.groupby('GENDER')['Konsumsi Alkohol','Konsumsi Rokok'].mean()*100
51
+ persentaseByGender2=persentaseByGender2.T
52
+ fig, ax = plt.subplots(figsize=(8, 6))
53
+ persentaseByGender2.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
54
+ ax.set_title('Persentase Konsumsi Alkohol dan Rokok Berdasarkan Jenis Kelamin')
55
+ ax.set_xlabel('Jenis Kelamin')
56
+ ax.set_ylabel('Persentase (%)')
57
+ plt.xticks(rotation=0)
58
+ plt.legend(['F', 'M'])
59
+ st.pyplot(fig)
60
+
61
+ persentaseByAlc=df.groupby('ALCOHOL CONSUMING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
62
+ persentaseBySmk=df.groupby('SMOKING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
63
+
64
+ #merename sehingga nama kolom dari series baru yang akan dibuat berubah
65
+ persentaseByAlc=persentaseByAlc.rename('Alkohol')
66
+ persentaseBySmk=persentaseBySmk.rename('Rokok')
67
+
68
+ #sambungkan
69
+ gayaHidup=pd.concat([persentaseByAlc, persentaseBySmk], axis=1)
70
+ gayaHidup=gayaHidup.T
71
+
72
+ #membuat barplot
73
+ fig, ax = plt.subplots(figsize=(8, 6))
74
+ gayaHidup.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
75
+ ax.set_title('Persentase diagnosis berdasarkan gaya hidup')
76
+ ax.set_xlabel('Gaya Hidup')
77
+ ax.set_ylabel('Persentase (%)')
78
+ plt.xticks(rotation=0)
79
+ plt.legend(['Non-konsumen', 'Konsumen'])
80
+ st.pyplot(fig)
81
+
82
+ # membuat dataframe copy dari dataframe awal
83
+ df1 = df.copy()
84
+
85
+ # mengelompokkan data pada kolom AGE menjadi 5 kelompok
86
+ df1['group'] = pd.cut(df1['AGE'], bins=5)
87
+
88
+ # menghitung nilai rata-rata pada kolom LUNG_CANCER untuk setiap kelompok
89
+ result = df1.groupby('group')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
90
+
91
+ # plot hasilnya menggunakan seaborn dengan barplot berwarna pink
92
+
93
+ sns.set_style('whitegrid')
94
+ fig, ax = plt.subplots(figsize=(8, 6))
95
+ ax = sns.barplot(x=result.index, y=result, color='pink')
96
+ ax.set(xlabel='AGE Group', ylabel='Percentage of LUNG_CANCER (YES)')
97
+ st.pyplot(fig)
98
+
99
+
100
+ if __name__== '__main__':
101
+ run()
image.png ADDED
le.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98c239c5a3fa55f0f5b62419b22d2ec1447437e4152dd7fa3cad0d119e1b2fa
3
+ size 253
logreg_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24cbd766152b5de614599341af5878f4973cf2ee305454d563b9dedda9c745da
3
+ size 2963
num_cols.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["AGE", "SMOKING", "YELLOW_FINGERS", "ANXIETY", "PEER_PRESSURE", "CHRONIC DISEASE", "FATIGUE ", "ALLERGY ", "WHEEZING", "ALCOHOL CONSUMING", "COUGHING", "SHORTNESS OF BREATH", "SWALLOWING DIFFICULTY", "CHEST PAIN"]
preprocessor.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a626957a595c434e24a3d6b901c50091e2d6fe4b8e33858b0caca12b9087b18
3
+ size 725
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ pandas
3
+ seaborn
4
+ matplotlib
5
+ plotly
6
+ numpy
7
+ scikit-learn==1.0.2
8
+ Pillow
rf_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e45e7da4c71b7dc406b3414ea0d210260c9ab75f07744c4f307abea5a685ac0
3
+ size 551007