RustX commited on
Commit
c85b155
1 Parent(s): 034c8bd

Create explore_page.py

Browse files
Files changed (1) hide show
  1. explore_page.py +75 -0
explore_page.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import matplotlib.pyplot as plt
4
+
5
+ def shorten_categories(categories, cutoff):
6
+ categorical_map = {}
7
+ for i in range(len(categories)):
8
+ if categories.values[i] >= cutoff:
9
+ categorical_map[categories.index[i]] = categories.index[i]
10
+ else:
11
+ categorical_map[categories.index[i]] = 'Other'
12
+ return categorical_map
13
+
14
+ def clean_experience(x):
15
+ if x == 'More than 50 years':
16
+ return 50
17
+ if x == 'Less than 1 year':
18
+ return 0.5
19
+ return float(x)
20
+
21
+ def clean_education(x):
22
+ if 'Bachelor’s degree' in x:
23
+ return 'Bachelor’s degree'
24
+ if 'Master’s degree' in x:
25
+ return 'Master’s degree'
26
+ if 'Professional degree' in x or 'Other doctoral' in x:
27
+ return 'Post grad'
28
+ return 'Less than a Bachelors'
29
+
30
+ @st.cache_data
31
+ def load_data():
32
+ df = pd.read_csv("survey_results_public.csv")
33
+ df = df[["Country", "EdLevel", "YearsCodePro", "Employment", "ConvertedCompYearly"]]
34
+ df = df.rename({"ConvertedCompYearly": "Salary"}, axis=1)
35
+ df = df[df["Salary"].notnull()]
36
+ df = df.dropna()
37
+ df = df[df["Employment"] == "Employed, full-time"]
38
+ df = df.drop("Employment", axis=1)
39
+
40
+ country_map = shorten_categories(df.Country.value_counts(), 400)
41
+ df['Country'] = df['Country'].map(country_map)
42
+ df = df[df["Salary"] <= 250000]
43
+ df = df[df["Salary"] >= 10000]
44
+ df = df[df['Country'] != 'Other']
45
+
46
+ df['YearsCodePro'] = df['YearsCodePro'].apply(clean_experience)
47
+ df['EdLevel'] = df['EdLevel'].apply(clean_education)
48
+ return df
49
+
50
+ df = load_data()
51
+
52
+ def show_explore_page():
53
+ st.title("Explore Software Engineer Salaries")
54
+
55
+ st.write("""### Stack Overflow Developer Survey 2022""")
56
+
57
+ data = df["Country"].value_counts()
58
+
59
+ fig1, ax1 = plt.subplots()
60
+ ax1.pie(data, labels=data.index, autopct="%1.1f%%", shadow=True, startangle=90)
61
+ ax1.axis("equal") # Equal aspect ratio ensures that pie is drawn as a circle.
62
+
63
+ st.write("""#### Number of Data from different countries""")
64
+
65
+ st.pyplot(fig1)
66
+
67
+ st.write("""#### Mean Salary Based On Country""")
68
+
69
+ data = df.groupby(["Country"])["Salary"].mean().sort_values(ascending=True)
70
+ st.bar_chart(data)
71
+
72
+ st.write("""#### Mean Salary Based On Experience""")
73
+
74
+ data = df.groupby(["YearsCodePro"])["Salary"].mean().sort_values(ascending=True)
75
+ st.line_chart(data)