Milestone2-p1 / eda.py
Azrieldr
main commit
dbcd01b
raw
history blame contribute delete
No virus
3.76 kB
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from PIL import Image
st.set_page_config(
page_title= 'Prediksi Diagnosis Kanker Paru-Paru',
layout='wide',
initial_sidebar_state='expanded'
)
def run():
image = Image.open('image.png')
resized_image = image.resize((300, 300))
st.image(resized_image, caption='Kanker Paru-paru')
st.title('Prediksi Kanker Paru-paru')
df = pd.read_csv('https://raw.githubusercontent.com/Azrieldr/latihan/master/survey%20lung%20cancer.csv')
st.dataframe(df)
yes_percentage = (df['LUNG_CANCER'].value_counts(normalize=True)*100)['YES']
# Create pie chart
fig, ax = plt.subplots(figsize=(10,15), dpi=100)
ax.pie([yes_percentage, 100-yes_percentage], labels=['Positif', 'Negatif'], autopct='%1.1f%%')
ax.set_title('Persentase Diagnosis Kanker Paru-paru')
st.pyplot(fig)
persentaseByGender=df.groupby('GENDER')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
print('persentase diagnosis kanker paru paru berdasarkan janis kelamin \n', persentaseByGender)
fig, ax = plt.subplots(figsize=(8, 6))
persentaseByGender.plot(kind='bar', ax=ax, color='#f4a7bb')
ax.set_title('Persentase Diagnosis Positif Berdasarkan Jenis Kelamin')
ax.set_xlabel('Jenis Kelamin')
ax.set_ylabel('Persentase (%)')
plt.xticks(rotation=0)
st.pyplot(fig)
# membuat dataframe copy dari dataframe awal
df1 = df.copy()
# membuat kolom baru dengan 1 berarti ya dan 0 berarti tidak
df1['Konsumsi Alkohol']=df1['ALCOHOL CONSUMING']-1
df1['Konsumsi Rokok']=df1['SMOKING']-1
#membuat table baru
persentaseByGender2=df1.groupby('GENDER')['Konsumsi Alkohol','Konsumsi Rokok'].mean()*100
persentaseByGender2=persentaseByGender2.T
fig, ax = plt.subplots(figsize=(8, 6))
persentaseByGender2.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
ax.set_title('Persentase Konsumsi Alkohol dan Rokok Berdasarkan Jenis Kelamin')
ax.set_xlabel('Jenis Kelamin')
ax.set_ylabel('Persentase (%)')
plt.xticks(rotation=0)
plt.legend(['F', 'M'])
st.pyplot(fig)
persentaseByAlc=df.groupby('ALCOHOL CONSUMING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
persentaseBySmk=df.groupby('SMOKING')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
#merename sehingga nama kolom dari series baru yang akan dibuat berubah
persentaseByAlc=persentaseByAlc.rename('Alkohol')
persentaseBySmk=persentaseBySmk.rename('Rokok')
#sambungkan
gayaHidup=pd.concat([persentaseByAlc, persentaseBySmk], axis=1)
gayaHidup=gayaHidup.T
#membuat barplot
fig, ax = plt.subplots(figsize=(8, 6))
gayaHidup.plot(kind='bar', ax=ax, color=['#f4a7bb','black'])
ax.set_title('Persentase diagnosis berdasarkan gaya hidup')
ax.set_xlabel('Gaya Hidup')
ax.set_ylabel('Persentase (%)')
plt.xticks(rotation=0)
plt.legend(['Non-konsumen', 'Konsumen'])
st.pyplot(fig)
# membuat dataframe copy dari dataframe awal
df1 = df.copy()
# mengelompokkan data pada kolom AGE menjadi 5 kelompok
df1['group'] = pd.cut(df1['AGE'], bins=5)
# menghitung nilai rata-rata pada kolom LUNG_CANCER untuk setiap kelompok
result = df1.groupby('group')['LUNG_CANCER'].apply(lambda x: (x == 'YES').sum() / len(x) * 100)
# plot hasilnya menggunakan seaborn dengan barplot berwarna pink
sns.set_style('whitegrid')
fig, ax = plt.subplots(figsize=(8, 6))
ax = sns.barplot(x=result.index, y=result, color='pink')
ax.set(xlabel='AGE Group', ylabel='Percentage of LUNG_CANCER (YES)')
st.pyplot(fig)
if __name__== '__main__':
run()