File size: 3,757 Bytes
e5258c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbcd01b
e5258c2
dbcd01b
e5258c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import streamlit as st
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image

st.set_page_config(
   page_title= 'Prediksi Diagnosis Kanker Paru-Paru',
   layout='wide',
   initial_sidebar_state='expanded'
)

def run():
    image = Image.open('image.png')
    resized_image = image.resize((300, 300))
    st.image(resized_image, caption='Kanker Paru-paru')

    st.title('Prediksi Kanker Paru-paru')
    df = pd.read_csv('https://raw.githubusercontent.com/Azrieldr/latihan/master/survey%20lung%20cancer.csv')
    st.dataframe(df)


    yes_percentage = (df['LUNG_CANCER'].value_counts(normalize=True)*100)['YES']
    # Create pie chart
    fig, ax = plt.subplots(figsize=(10,15), dpi=100)
    ax.pie([yes_percentage, 100-yes_percentage], labels=['Positif', 'Negatif'], autopct='%1.1f%%')
    ax.set_title('Persentase Diagnosis Kanker Paru-paru')
    st.pyplot(fig)

    persentaseByGender=df.groupby('GENDER')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
    print('persentase diagnosis kanker paru paru berdasarkan janis kelamin \n', persentaseByGender)
    fig, ax = plt.subplots(figsize=(8, 6))
    persentaseByGender.plot(kind='bar', ax=ax, color='#f4a7bb')
    ax.set_title('Persentase Diagnosis Positif Berdasarkan Jenis Kelamin')
    ax.set_xlabel('Jenis Kelamin')
    ax.set_ylabel('Persentase (%)')
    plt.xticks(rotation=0)
    st.pyplot(fig)


    # membuat dataframe copy dari dataframe awal
    df1 = df.copy()

    # membuat kolom baru dengan 1 berarti ya dan 0 berarti tidak
    df1['Konsumsi Alkohol']=df1['ALCOHOL CONSUMING']-1
    df1['Konsumsi Rokok']=df1['SMOKING']-1

    #membuat table baru
    persentaseByGender2=df1.groupby('GENDER')['Konsumsi Alkohol','Konsumsi Rokok'].mean()*100
    persentaseByGender2=persentaseByGender2.T
    fig, ax = plt.subplots(figsize=(8, 6))
    persentaseByGender2.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
    ax.set_title('Persentase Konsumsi Alkohol dan Rokok Berdasarkan Jenis Kelamin')
    ax.set_xlabel('Jenis Kelamin')
    ax.set_ylabel('Persentase (%)')
    plt.xticks(rotation=0)
    plt.legend(['F', 'M'])
    st.pyplot(fig)

    persentaseByAlc=df.groupby('ALCOHOL CONSUMING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
    persentaseBySmk=df.groupby('SMOKING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)

    #merename sehingga nama kolom dari series baru yang akan dibuat berubah
    persentaseByAlc=persentaseByAlc.rename('Alkohol')
    persentaseBySmk=persentaseBySmk.rename('Rokok')

    #sambungkan
    gayaHidup=pd.concat([persentaseByAlc, persentaseBySmk], axis=1)
    gayaHidup=gayaHidup.T

    #membuat barplot
    fig, ax = plt.subplots(figsize=(8, 6))
    gayaHidup.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
    ax.set_title('Persentase diagnosis berdasarkan gaya hidup')
    ax.set_xlabel('Gaya Hidup')
    ax.set_ylabel('Persentase (%)')
    plt.xticks(rotation=0)
    plt.legend(['Non-konsumen', 'Konsumen'])
    st.pyplot(fig)

    # membuat dataframe copy dari dataframe awal
    df1 = df.copy()

    # mengelompokkan data pada kolom AGE menjadi 5 kelompok
    df1['group'] = pd.cut(df1['AGE'], bins=5)

    # menghitung nilai rata-rata pada kolom LUNG_CANCER untuk setiap kelompok
    result = df1.groupby('group')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)

    # plot hasilnya menggunakan seaborn dengan barplot berwarna pink

    sns.set_style('whitegrid')
    fig, ax = plt.subplots(figsize=(8, 6))
    ax = sns.barplot(x=result.index, y=result, color='pink')
    ax.set(xlabel='AGE Group', ylabel='Percentage of LUNG_CANCER (YES)')
    st.pyplot(fig)


if __name__== '__main__':
    run()