dev-sal-predict / explore_page.py
LawalAfeez's picture
app
144cad2
raw
history blame
2.46 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
def join_country(cat,cutoff):
cat_join={}
for i in cat.index:
if cat[i]>=cutoff:
cat_join[i]=i
else:
cat_join[i]="others"
return cat_join
def clean_experience(x):
if x=="More than 50 years":
return 50
if x=="Less than 1 year":
return 0.5
return float(x)
def clean_education(x):
if "Bachelor’s degree" in x:
return "Bachelor’s degree"
if "Master’s degree" in x:
return "Master’s degree"
if "Professional degree" in x or "Other doctoral" in x:
return "Post grad"
return "Less than a Bachelors"
@st.cache
def load_data():
df=pd.read_csv("survey.csv")
column_needed=df.loc[:,["Country","EdLevel","YearsCodePro","Employment","ConvertedComp"]]
column_needed=column_needed.rename({"ConvertedComp":"Salary"},axis=1)
column_needed=column_needed[~column_needed["Salary"].isnull()]
column_needed=column_needed.dropna()
column_needed= column_needed[column_needed["Employment"]=="Employed full-time"]
column_needed=column_needed.drop("Employment",axis=1)
country_map=join_country(column_needed["Country"].value_counts(),400)
column_needed["Country"]=column_needed["Country"].map(country_map)
column_needed=column_needed[column_needed["Salary"]<=250000]
column_needed=column_needed[column_needed["Salary"]>10000]
column_needed=column_needed[column_needed["Country"]!="others"]
column_needed["YearsCodePro"]=column_needed["YearsCodePro"].apply(clean_experience)
column_needed["EdLevel"]=column_needed["EdLevel"].apply(clean_education)
return column_needed
data=load_data()
def show_explore_page():
st.title("Explore Software Engineer Average Salary")
st.write("""### Stack Overflow Developer Salary""")
dataneeded=data["Country"].value_counts()
fig,ax=plt.subplots()
ax.pie(dataneeded,labels=dataneeded.index,shadow=True,startangle=90)
ax.axis("equal")
st.write("""#### Number Of Data From Each Country""")
st.pyplot(fig)
st.write("""#### Mean Salary Base On The Country""")
mean=data.groupby(["Country"])["Salary"].mean().sort_values(ascending=True)
st.bar_chart(mean)
st.write("""#### Mean Salary Base On The Experience""")
mean=data.groupby(["YearsCodePro"])["Salary"].mean().sort_values(ascending=True)
st.line_chart(mean)