Milestone2-p1 / eda.py
Azrieldr
main commit
dbcd01b
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title= 'Prediksi Diagnosis Kanker Paru-Paru',
layout='wide',
initial_sidebar_state='expanded'
)
def run():
image = Image.open('image.png')
resized_image = image.resize((300, 300))
st.image(resized_image, caption='Kanker Paru-paru')
st.title('Prediksi Kanker Paru-paru')
df = pd.read_csv('https://raw.githubusercontent.com/Azrieldr/latihan/master/survey%20lung%20cancer.csv')
st.dataframe(df)
yes_percentage = (df['LUNG_CANCER'].value_counts(normalize=True)*100)['YES']
# Create pie chart
fig, ax = plt.subplots(figsize=(10,15), dpi=100)
ax.pie([yes_percentage, 100-yes_percentage], labels=['Positif', 'Negatif'], autopct='%1.1f%%')
ax.set_title('Persentase Diagnosis Kanker Paru-paru')
st.pyplot(fig)
persentaseByGender=df.groupby('GENDER')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
print('persentase diagnosis kanker paru paru berdasarkan janis kelamin \n', persentaseByGender)
fig, ax = plt.subplots(figsize=(8, 6))
persentaseByGender.plot(kind='bar', ax=ax, color='#f4a7bb')
ax.set_title('Persentase Diagnosis Positif Berdasarkan Jenis Kelamin')
ax.set_xlabel('Jenis Kelamin')
ax.set_ylabel('Persentase (%)')
plt.xticks(rotation=0)
st.pyplot(fig)
# membuat dataframe copy dari dataframe awal
df1 = df.copy()
# membuat kolom baru dengan 1 berarti ya dan 0 berarti tidak
df1['Konsumsi Alkohol']=df1['ALCOHOL CONSUMING']-1
df1['Konsumsi Rokok']=df1['SMOKING']-1
#membuat table baru
persentaseByGender2=df1.groupby('GENDER')['Konsumsi Alkohol','Konsumsi Rokok'].mean()*100
persentaseByGender2=persentaseByGender2.T
fig, ax = plt.subplots(figsize=(8, 6))
persentaseByGender2.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
ax.set_title('Persentase Konsumsi Alkohol dan Rokok Berdasarkan Jenis Kelamin')
ax.set_xlabel('Jenis Kelamin')
ax.set_ylabel('Persentase (%)')
plt.xticks(rotation=0)
plt.legend(['F', 'M'])
st.pyplot(fig)
persentaseByAlc=df.groupby('ALCOHOL CONSUMING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
persentaseBySmk=df.groupby('SMOKING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
#merename sehingga nama kolom dari series baru yang akan dibuat berubah
persentaseByAlc=persentaseByAlc.rename('Alkohol')
persentaseBySmk=persentaseBySmk.rename('Rokok')
#sambungkan
gayaHidup=pd.concat([persentaseByAlc, persentaseBySmk], axis=1)
gayaHidup=gayaHidup.T
#membuat barplot
fig, ax = plt.subplots(figsize=(8, 6))
gayaHidup.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
ax.set_title('Persentase diagnosis berdasarkan gaya hidup')
ax.set_xlabel('Gaya Hidup')
ax.set_ylabel('Persentase (%)')
plt.xticks(rotation=0)
plt.legend(['Non-konsumen', 'Konsumen'])
st.pyplot(fig)
# membuat dataframe copy dari dataframe awal
df1 = df.copy()
# mengelompokkan data pada kolom AGE menjadi 5 kelompok
df1['group'] = pd.cut(df1['AGE'], bins=5)
# menghitung nilai rata-rata pada kolom LUNG_CANCER untuk setiap kelompok
result = df1.groupby('group')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
# plot hasilnya menggunakan seaborn dengan barplot berwarna pink
sns.set_style('whitegrid')
fig, ax = plt.subplots(figsize=(8, 6))
ax = sns.barplot(x=result.index, y=result, color='pink')
ax.set(xlabel='AGE Group', ylabel='Percentage of LUNG_CANCER (YES)')
st.pyplot(fig)
if __name__== '__main__':
run()