musk12 commited on
Commit
12269bd
1 Parent(s): 9e40636

Update olympic_app.py

Browse files
Files changed (1) hide show
  1. olympic_app.py +219 -214
olympic_app.py CHANGED
@@ -1,214 +1,219 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import functions
4
- import important
5
- import plotly.express as px
6
- import matplotlib.pyplot as plt
7
- import seaborn as sns
8
- import plotly.figure_factory as ff
9
-
10
-
11
- df = pd.read_csv("athlete_events.csv")
12
- region_df = pd.read_csv("noc_regions.csv")
13
-
14
- df = functions.preprocess1(df, region_df)
15
-
16
-
17
- #st.title("Olympic Data Analysis.")
18
-
19
- st.sidebar.title("Olympic Data Analysis.")
20
- st.sidebar.image("Olympic_image.jpg")
21
- user_menu = st.sidebar.radio("Select an option", ("Overview", "Overall Analysis", "Medal tally", "Country-wise Analysis", "Athlete-wise Analysis"))
22
-
23
- if user_menu == "Overview":
24
- st.image("Analysis_image.png")
25
-
26
- if user_menu == "Overall Analysis":
27
- editions = df["Year"].unique().shape[0]-1
28
- cities = df["City"].unique().shape[0]
29
- sports = df["Sport"].unique().shape[0]
30
- events = df["Event"].unique().shape[0]
31
- athletes = df["Name"].unique().shape[0]
32
- nations = df["region"].unique().shape[0]
33
-
34
- st.title("Top Statistics")
35
-
36
- col1, col2, col3 = st.columns(3)
37
-
38
- with col1:
39
- st.header("Editions")
40
- st.title(editions)
41
- with col2:
42
- st.header("cities")
43
- st.title(cities)
44
- with col3:
45
- st.header("sports")
46
- st.title(sports)
47
-
48
- col1, col2, col3 = st.columns(3)
49
-
50
- with col1:
51
- st.header("events")
52
- st.title(events)
53
- with col2:
54
- st.header("athletes")
55
- st.title(athletes)
56
- with col3:
57
- st.header("nations")
58
- st.title(nations)
59
-
60
- st.subheader("Countries have hosted the olympics.")
61
- n_df = df.drop_duplicates(subset="Year")[["Year","City"]]
62
- fig = px.bar(n_df, x='City', y='Year',text_auto = True,)
63
- fig.update_xaxes(tickangle=45)
64
- st.plotly_chart(fig)
65
-
66
- countries_over_time = important.data_over_time(df,"region")
67
- fig = px.line(countries_over_time, x="Year", y="region", title = 'Participating Countries over the years')
68
- st.plotly_chart(fig)
69
-
70
- events_over_time = important.data_over_time(df, "Event")
71
- fig = px.line(events_over_time, x="Year", y="Event", title = 'Events over the years')
72
- st.plotly_chart(fig)
73
-
74
- athlete_over_time = important.data_over_time(df, "Name")
75
- fig = px.line(athlete_over_time, x="Year", y="Name", title = 'Athletes over the years')
76
- st.plotly_chart(fig)
77
-
78
- st.title("No. of Events over time(Every Sport)")
79
- fig, ax = plt.subplots(figsize=(20,20))
80
- x = df.drop_duplicates(["Year", "Sport", "Event"])
81
- ax = sns.heatmap(x.pivot_table(index="Sport", columns="Year", values="Event", aggfunc="count").fillna(0).astype("int"),annot=True)
82
- st.pyplot(fig)
83
-
84
- sport_list = df["Sport"].unique().tolist()
85
- sport_list.sort()
86
- sport_list.insert(0,"Overall")
87
-
88
- selected_sport = st.selectbox("Select a Sport",sport_list)
89
- st.title("Most successful Athletes")
90
- x = important.most_successful(df,selected_sport)
91
- st.table(x)
92
-
93
- st.title("Most popular sports of Olympics")
94
- sport_df = df["Sport"].value_counts().reset_index()
95
- fig = px.pie(sport_df, values='count', names='Sport')
96
- fig.update_layout(autosize=False, width=850,height=700)
97
- st.plotly_chart(fig)
98
-
99
- st.title("Locations of Stadium of countries where olympics held.")
100
- data1 = pd.read_csv("lat_long.csv")
101
- st.map(data1)
102
-
103
-
104
- if user_menu == "Medal tally":
105
-
106
- st.sidebar.header("Medal tally")
107
- years, country = important.country_year_list(df)
108
-
109
- selected_year = st.sidebar.selectbox("Select year", years)
110
- selected_country = st.sidebar.selectbox("Select country", country)
111
-
112
- medal_tally = important.fetch_medal_tally(df,selected_year,selected_country)
113
-
114
- if selected_year == "Overall" and selected_country == "Overall":
115
- st.title("Overall Medal Tally")
116
- if selected_year != "Overall" and selected_country == "Overall":
117
- st.title("Medal Tally in " + str(selected_year) + " Olympics")
118
- if selected_year == "Overall" and selected_country != "Overall":
119
- st.title(selected_country + " overall performance in Olympics" )
120
- if selected_year != "Overall" and selected_country != "Overall":
121
- st.title(selected_country + "'s performance in " + str(selected_year) + " Olympics")
122
- st.table(medal_tally)
123
-
124
-
125
- if user_menu == "Country-wise Analysis":
126
-
127
- st.title("Country-wise Analysis")
128
-
129
- country = df["region"].dropna().unique().tolist()
130
- country.sort()
131
-
132
- selected_country = st.sidebar.selectbox("Select a country", country)
133
- new_region = important.country_wise_medal_tally(df, selected_country)
134
-
135
- fig = px.line(new_region, x = "Year", y="Medal")
136
- st.subheader(selected_country + "'s Medal Tally over the years")
137
- st.plotly_chart(fig)
138
-
139
- pt = important.country_event_heatmap(df,selected_country)
140
- st.subheader(selected_country+" excels in the following sports")
141
- fig, ax = plt.subplots(figsize=(20,20))
142
- ax = sns.heatmap(pt, annot=True)
143
- st.pyplot(fig)
144
-
145
- athlete = important.most_successful_athletes(df, selected_country)
146
- st.subheader("Top 10 athletes of "+ selected_country)
147
- st.table(athlete)
148
-
149
- if user_menu == "Athlete-wise Analysis":
150
-
151
- athlete_df = df.drop_duplicates(subset=["Name", "region"])
152
- x1 = athlete_df["Age"].dropna()
153
- x2 = athlete_df[athlete_df["Medal"] == "Gold"]["Age"].dropna()
154
- x3 = athlete_df[athlete_df["Medal"] == "Silver"]["Age"].dropna()
155
- x4 = athlete_df[athlete_df["Medal"] == "Bronze"]["Age"].dropna()
156
-
157
- st.title("Distribution of Age.")
158
- fig = ff.create_distplot([x1,x2,x3,x4], ["Age Distribution","Gold Medalist","Silver Medalist","Bronze Medalist"], show_hist=False, show_rug=False)
159
- fig.update_layout(autosize=False, width=850,height=530)
160
- st.plotly_chart(fig)
161
-
162
- st.title("Distribution of Age wrt sports(Gold Medalist)")
163
- famous_sports =['Basketball','Judo', 'Football','Tug-Of-War','Athletics','Swimming','Badminton','Sailing','Gymnastics','Art Competitions',
164
- 'Handball','Weightlifting','Wrestling','Water Polo','Hockey','Rowing','Fencing','Shooting','Boxing','Taekwondo',
165
- 'Cycling', 'Diving', 'Canoeing', 'Tennis', 'Modern Pentathlon', 'Golf', 'Softball', 'Archery', 'Volleyball','Synchronized Swimming',
166
- 'Table Tennis', 'Baseball','Rhythmic Gymnastics','Rugby Sevens', 'Beach Volleyball', 'Triathlon', 'Rugby', 'Lacrosse', 'Polo', 'Cricket',
167
- 'Ice Hockey','Motorboating']
168
- x = []
169
- name = []
170
- for sport in famous_sports:
171
- temp_df = athlete_df[athlete_df["Sport"] == sport]
172
- x.append(temp_df[temp_df["Medal"]=="Gold"]["Age"].dropna())
173
- name.append(sport)
174
-
175
- fig1 = ff.create_distplot(x,name,show_hist=False, show_rug=False)
176
- fig1.update_layout(autosize=False, width=850,height=530)
177
- st.plotly_chart(fig1)
178
-
179
-
180
- st.title("Height vs Weight")
181
- sport_list = df["Sport"].unique().tolist()
182
- sport_list.sort()
183
- sport_list.insert(0,"Overall")
184
-
185
- selected_sport = st.selectbox("Select a Sport",sport_list)
186
- new_df = important.weight_v_height(df,selected_sport)
187
-
188
- fig, ax = plt.subplots(figsize=(10,10))
189
- ax = sns.scatterplot(new_df, x ="Weight",y = "Height", hue=new_df["Medal"],style=new_df["Sex"],s=100)
190
- st.pyplot(fig)
191
-
192
- st.title("Men vs Women participation over the years")
193
- final_df = important.men_vs_women(df)
194
- fig = px.line(final_df, x="Year", y=["Male","Female"])
195
- st.plotly_chart(fig)
196
-
197
-
198
-
199
-
200
-
201
-
202
-
203
-
204
-
205
-
206
-
207
-
208
-
209
-
210
-
211
-
212
-
213
-
214
-
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import functions
4
+ import important
5
+ import plotly.express as px
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+ import plotly.figure_factory as ff
9
+ from streamlit_option_menu import option_menu
10
+
11
+
12
+ df = pd.read_csv("athlete_events.csv")
13
+ region_df = pd.read_csv("noc_regions.csv")
14
+
15
+ df = functions.preprocess1(df, region_df)
16
+
17
+
18
+ #st.title("Olympic Data Analysis.")
19
+
20
+ st.sidebar.title("Olympic Data Analysis.")
21
+ st.sidebar.image("Olympic_image.jpg")
22
+
23
+ with st.sidebar:
24
+ user_menu = option_menu("Select an option", ["Overview", "Overall Analysis", "Medal tally", "Country-wise Analysis", "Athlete-wise Analysis"],
25
+ icons = ["slack", "bar-chart-line-fill", "award-fill", "graph-up-arrow", "person-walking"],
26
+ menu_icon="bag-fill", default_index=0)
27
+
28
+ if user_menu == "Overview":
29
+ st.image("Analysis_image.png")
30
+
31
+ if user_menu == "Overall Analysis":
32
+ editions = df["Year"].unique().shape[0]-1
33
+ cities = df["City"].unique().shape[0]
34
+ sports = df["Sport"].unique().shape[0]
35
+ events = df["Event"].unique().shape[0]
36
+ athletes = df["Name"].unique().shape[0]
37
+ nations = df["region"].unique().shape[0]
38
+
39
+ st.title("Top Statistics")
40
+
41
+ col1, col2, col3 = st.columns(3)
42
+
43
+ with col1:
44
+ st.header("Editions")
45
+ st.title(editions)
46
+ with col2:
47
+ st.header("cities")
48
+ st.title(cities)
49
+ with col3:
50
+ st.header("sports")
51
+ st.title(sports)
52
+
53
+ col1, col2, col3 = st.columns(3)
54
+
55
+ with col1:
56
+ st.header("events")
57
+ st.title(events)
58
+ with col2:
59
+ st.header("athletes")
60
+ st.title(athletes)
61
+ with col3:
62
+ st.header("nations")
63
+ st.title(nations)
64
+
65
+ st.subheader("Countries have hosted the olympics.")
66
+ n_df = df.drop_duplicates(subset="Year")[["Year","City"]]
67
+ fig = px.bar(n_df, x='City', y='Year',text_auto = True,)
68
+ fig.update_xaxes(tickangle=45)
69
+ st.plotly_chart(fig)
70
+
71
+ countries_over_time = important.data_over_time(df,"region")
72
+ fig = px.line(countries_over_time, x="Year", y="region", title = 'Participating Countries over the years')
73
+ st.plotly_chart(fig)
74
+
75
+ events_over_time = important.data_over_time(df, "Event")
76
+ fig = px.line(events_over_time, x="Year", y="Event", title = 'Events over the years')
77
+ st.plotly_chart(fig)
78
+
79
+ athlete_over_time = important.data_over_time(df, "Name")
80
+ fig = px.line(athlete_over_time, x="Year", y="Name", title = 'Athletes over the years')
81
+ st.plotly_chart(fig)
82
+
83
+ st.title("No. of Events over time(Every Sport)")
84
+ fig, ax = plt.subplots(figsize=(20,20))
85
+ x = df.drop_duplicates(["Year", "Sport", "Event"])
86
+ ax = sns.heatmap(x.pivot_table(index="Sport", columns="Year", values="Event", aggfunc="count").fillna(0).astype("int"),annot=True)
87
+ st.pyplot(fig)
88
+
89
+ sport_list = df["Sport"].unique().tolist()
90
+ sport_list.sort()
91
+ sport_list.insert(0,"Overall")
92
+
93
+ selected_sport = st.selectbox("Select a Sport",sport_list)
94
+ st.title("Most successful Athletes")
95
+ x = important.most_successful(df,selected_sport)
96
+ st.table(x)
97
+
98
+ st.title("Most popular sports of Olympics")
99
+ sport_df = df["Sport"].value_counts().reset_index()
100
+ fig = px.pie(sport_df, values='count', names='Sport')
101
+ fig.update_layout(autosize=False, width=850,height=700)
102
+ st.plotly_chart(fig)
103
+
104
+ st.title("Locations of Stadium of countries where olympics held.")
105
+ data1 = pd.read_csv("lat_long.csv")
106
+ st.map(data1)
107
+
108
+
109
+ if user_menu == "Medal tally":
110
+
111
+ st.sidebar.header("Medal tally")
112
+ years, country = important.country_year_list(df)
113
+
114
+ selected_year = st.sidebar.selectbox("Select year", years)
115
+ selected_country = st.sidebar.selectbox("Select country", country)
116
+
117
+ medal_tally = important.fetch_medal_tally(df,selected_year,selected_country)
118
+
119
+ if selected_year == "Overall" and selected_country == "Overall":
120
+ st.title("Overall Medal Tally")
121
+ if selected_year != "Overall" and selected_country == "Overall":
122
+ st.title("Medal Tally in " + str(selected_year) + " Olympics")
123
+ if selected_year == "Overall" and selected_country != "Overall":
124
+ st.title(selected_country + " overall performance in Olympics" )
125
+ if selected_year != "Overall" and selected_country != "Overall":
126
+ st.title(selected_country + "'s performance in " + str(selected_year) + " Olympics")
127
+ st.table(medal_tally)
128
+
129
+
130
+ if user_menu == "Country-wise Analysis":
131
+
132
+ st.title("Country-wise Analysis")
133
+
134
+ country = df["region"].dropna().unique().tolist()
135
+ country.sort()
136
+
137
+ selected_country = st.sidebar.selectbox("Select a country", country)
138
+ new_region = important.country_wise_medal_tally(df, selected_country)
139
+
140
+ fig = px.line(new_region, x = "Year", y="Medal")
141
+ st.subheader(selected_country + "'s Medal Tally over the years")
142
+ st.plotly_chart(fig)
143
+
144
+ pt = important.country_event_heatmap(df,selected_country)
145
+ st.subheader(selected_country+" excels in the following sports")
146
+ fig, ax = plt.subplots(figsize=(20,20))
147
+ ax = sns.heatmap(pt, annot=True)
148
+ st.pyplot(fig)
149
+
150
+ athlete = important.most_successful_athletes(df, selected_country)
151
+ st.subheader("Top 10 athletes of "+ selected_country)
152
+ st.table(athlete)
153
+
154
+ if user_menu == "Athlete-wise Analysis":
155
+
156
+ athlete_df = df.drop_duplicates(subset=["Name", "region"])
157
+ x1 = athlete_df["Age"].dropna()
158
+ x2 = athlete_df[athlete_df["Medal"] == "Gold"]["Age"].dropna()
159
+ x3 = athlete_df[athlete_df["Medal"] == "Silver"]["Age"].dropna()
160
+ x4 = athlete_df[athlete_df["Medal"] == "Bronze"]["Age"].dropna()
161
+
162
+ st.title("Distribution of Age.")
163
+ fig = ff.create_distplot([x1,x2,x3,x4], ["Age Distribution","Gold Medalist","Silver Medalist","Bronze Medalist"], show_hist=False, show_rug=False)
164
+ fig.update_layout(autosize=False, width=850,height=530)
165
+ st.plotly_chart(fig)
166
+
167
+ st.title("Distribution of Age wrt sports(Gold Medalist)")
168
+ famous_sports =['Basketball','Judo', 'Football','Tug-Of-War','Athletics','Swimming','Badminton','Sailing','Gymnastics','Art Competitions',
169
+ 'Handball','Weightlifting','Wrestling','Water Polo','Hockey','Rowing','Fencing','Shooting','Boxing','Taekwondo',
170
+ 'Cycling', 'Diving', 'Canoeing', 'Tennis', 'Modern Pentathlon', 'Golf', 'Softball', 'Archery', 'Volleyball','Synchronized Swimming',
171
+ 'Table Tennis', 'Baseball','Rhythmic Gymnastics','Rugby Sevens', 'Beach Volleyball', 'Triathlon', 'Rugby', 'Lacrosse', 'Polo', 'Cricket',
172
+ 'Ice Hockey','Motorboating']
173
+ x = []
174
+ name = []
175
+ for sport in famous_sports:
176
+ temp_df = athlete_df[athlete_df["Sport"] == sport]
177
+ x.append(temp_df[temp_df["Medal"]=="Gold"]["Age"].dropna())
178
+ name.append(sport)
179
+
180
+ fig1 = ff.create_distplot(x,name,show_hist=False, show_rug=False)
181
+ fig1.update_layout(autosize=False, width=850,height=530)
182
+ st.plotly_chart(fig1)
183
+
184
+
185
+ st.title("Height vs Weight")
186
+ sport_list = df["Sport"].unique().tolist()
187
+ sport_list.sort()
188
+ sport_list.insert(0,"Overall")
189
+
190
+ selected_sport = st.selectbox("Select a Sport",sport_list)
191
+ new_df = important.weight_v_height(df,selected_sport)
192
+
193
+ fig, ax = plt.subplots(figsize=(10,10))
194
+ ax = sns.scatterplot(new_df, x ="Weight",y = "Height", hue=new_df["Medal"],style=new_df["Sex"],s=100)
195
+ st.pyplot(fig)
196
+
197
+ st.title("Men vs Women participation over the years")
198
+ final_df = important.men_vs_women(df)
199
+ fig = px.line(final_df, x="Year", y=["Male","Female"])
200
+ st.plotly_chart(fig)
201
+
202
+
203
+
204
+
205
+
206
+
207
+
208
+
209
+
210
+
211
+
212
+
213
+
214
+
215
+
216
+
217
+
218
+
219
+