Spaces:
Sleeping
Sleeping
olympic_eda
Browse files- .gitattributes +2 -0
- Analysis_image.png +3 -0
- Olympic_image.jpg +0 -0
- README.md +5 -13
- athlete_events.csv +3 -0
- functions.py +25 -0
- important.py +103 -0
- lat_long.csv +24 -0
- noc_regions.csv +1 -0
- olympic_app.py +214 -0
- olympics-data-analysis-web-app +2 -0
- requirements.txt +8 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
Analysis_image.png filter=lfs diff=lfs merge=lfs -text
|
37 |
+
athlete_events.csv filter=lfs diff=lfs merge=lfs -text
|
Analysis_image.png
ADDED
![]() |
Git LFS Details
|
Olympic_image.jpg
ADDED
![]() |
README.md
CHANGED
@@ -1,13 +1,5 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
sdk: streamlit
|
7 |
-
sdk_version: 1.36.0
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
license: mit
|
11 |
-
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
# olympics-data-analysis-web-app
|
2 |
+
|
3 |
+
A Streamlit web application for the analysis of olympics data.
|
4 |
+
|
5 |
+
Live Web App : https://olympicanalysiss.streamlit.app/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
athlete_events.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1751e89b509dd8c0fa0979ee258eef4374aa93f17d64cd07383ad3f57c5aabd7
|
3 |
+
size 41500688
|
functions.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
df = pd.read_csv("athlete_events.csv")
|
4 |
+
region_df = pd.read_csv("noc_regions.csv")
|
5 |
+
|
6 |
+
def preprocess1(df, region_df):
|
7 |
+
|
8 |
+
|
9 |
+
#filtering for summer olympics
|
10 |
+
df = df[df["Season"] == "Summer"]
|
11 |
+
|
12 |
+
#merging with noc_regions data
|
13 |
+
df = df.merge(region_df, on="NOC", how="left")
|
14 |
+
|
15 |
+
#dropping duplicates
|
16 |
+
df.drop_duplicates(inplace=True)
|
17 |
+
|
18 |
+
#one hot encoding medal column
|
19 |
+
dummy = pd.get_dummies(df["Medal"], dtype=int)
|
20 |
+
|
21 |
+
#and concat dummy with original data
|
22 |
+
df = pd.concat([df, dummy], axis=1)
|
23 |
+
|
24 |
+
return df
|
25 |
+
|
important.py
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def medal_tally(df):
|
2 |
+
medal_tally = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
|
3 |
+
medal_tally = medal_tally.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
|
4 |
+
medal_tally["Total"] = medal_tally["Gold"]+medal_tally["Silver"]+medal_tally["Bronze"]
|
5 |
+
|
6 |
+
return medal_tally
|
7 |
+
|
8 |
+
def country_year_list(df):
|
9 |
+
years = df["Year"].unique().tolist()
|
10 |
+
years.sort()
|
11 |
+
years.insert(0,"Overall")
|
12 |
+
|
13 |
+
country = df["region"].dropna().unique().tolist()
|
14 |
+
country.sort()
|
15 |
+
country.insert(0,"Overall")
|
16 |
+
|
17 |
+
return years, country
|
18 |
+
|
19 |
+
def fetch_medal_tally(df,year,country):
|
20 |
+
medal_df = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
|
21 |
+
flag = 0
|
22 |
+
if year == "Overall" and country == "Overall":
|
23 |
+
temp_df = medal_df
|
24 |
+
if year == "Overall" and country != "Overall":
|
25 |
+
flag = 1
|
26 |
+
temp_df = medal_df[medal_df["region"] == country]
|
27 |
+
if year != "Overall" and country == "Overall":
|
28 |
+
temp_df = medal_df[medal_df["Year"] == int(year)]
|
29 |
+
if year != "Overall" and country != "Overall":
|
30 |
+
temp_df = medal_df[(medal_df["Year"] == year) & (medal_df["region"] == country)]
|
31 |
+
|
32 |
+
if flag == 1:
|
33 |
+
x = temp_df.groupby("Year").sum()[["Gold", "Silver", "Bronze"]].sort_values("Year").reset_index()
|
34 |
+
else:
|
35 |
+
x = temp_df.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
|
36 |
+
x["Total"] = x["Gold"]+x["Silver"]+x["Bronze"]
|
37 |
+
|
38 |
+
return x
|
39 |
+
|
40 |
+
def data_over_time(df,col):
|
41 |
+
year_count = df.drop_duplicates(["Year", col])["Year"].value_counts().reset_index().sort_values("Year")
|
42 |
+
year_count.rename(columns={"count":col}, inplace = True)
|
43 |
+
|
44 |
+
return year_count
|
45 |
+
|
46 |
+
|
47 |
+
def most_successful(df, sport):
|
48 |
+
temp_df = df.dropna(subset=["Medal"])
|
49 |
+
|
50 |
+
if sport != "Overall":
|
51 |
+
temp_df = temp_df[temp_df["Sport"] == sport]
|
52 |
+
x= temp_df["Name"].value_counts().reset_index().head(15).merge(df,left_on="Name",
|
53 |
+
right_on="Name",how="left") [["Name","count","Sport","region"]].drop_duplicates("Name")
|
54 |
+
x.rename(columns={"count":"Medals"}, inplace=True)
|
55 |
+
return x
|
56 |
+
|
57 |
+
def country_wise_medal_tally(df, region):
|
58 |
+
|
59 |
+
temp_df = df.dropna(subset=["Medal"])
|
60 |
+
temp_df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"], inplace=True)
|
61 |
+
new_df=temp_df[temp_df["region"]==region]
|
62 |
+
new_df.groupby("Year").count()["Medal"].reset_index()
|
63 |
+
final_df = new_df.groupby("Year").count()["Medal"].reset_index()
|
64 |
+
return final_df
|
65 |
+
|
66 |
+
def country_event_heatmap(df,country):
|
67 |
+
|
68 |
+
temp_df = df.dropna(subset=["Medal"])
|
69 |
+
new_df=temp_df[temp_df["region"]==country]
|
70 |
+
pt = new_df.pivot_table(index="Sport", columns="Year", values="Medal", aggfunc="count").fillna(0).astype("int")
|
71 |
+
|
72 |
+
return pt
|
73 |
+
|
74 |
+
def most_successful_athletes(df, country):
|
75 |
+
temp_df = df.dropna(subset=["Medal"])
|
76 |
+
|
77 |
+
temp_df = temp_df[temp_df["region"] == country]
|
78 |
+
x= temp_df["Name"].value_counts().reset_index().head(10).merge(df,left_on="Name", right_on="Name",
|
79 |
+
how="left")[["Name","count","Sport"]].drop_duplicates("Name")
|
80 |
+
x.rename(columns={"count":"Medals"}, inplace=True)
|
81 |
+
return x
|
82 |
+
|
83 |
+
def weight_v_height(df, sport):
|
84 |
+
athlete_df = df.drop_duplicates(subset=["Name", "region"])
|
85 |
+
athlete_df["Medal"].fillna("No Medal", inplace=True)
|
86 |
+
if sport != "Overall":
|
87 |
+
temp_df = athlete_df[athlete_df["Sport"]==sport]
|
88 |
+
return temp_df
|
89 |
+
else:
|
90 |
+
return athlete_df
|
91 |
+
|
92 |
+
def men_vs_women(df):
|
93 |
+
athlete_df = df.drop_duplicates(subset=["Name", "region"])
|
94 |
+
men = athlete_df[athlete_df["Sex"]=="M"].groupby("Year").count()["Name"].reset_index()
|
95 |
+
women = athlete_df[athlete_df["Sex"]=="F"].groupby("Year").count()["Name"].reset_index()
|
96 |
+
final = men.merge(women, on="Year", how="left")
|
97 |
+
final.rename(columns={"Name_x":"Male", "Name_y":"Female"}, inplace=True)
|
98 |
+
final.fillna(0, inplace=True)
|
99 |
+
return final
|
100 |
+
|
101 |
+
|
102 |
+
|
103 |
+
|
lat_long.csv
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
City,latitude,longitude
|
2 |
+
Athina,38.0359,23.787
|
3 |
+
Paris,48.841465,2.252616
|
4 |
+
St. Louis,38.6479,-90.3138
|
5 |
+
London,51.5387,-0.0172
|
6 |
+
Stockholm,59.3453,18.079
|
7 |
+
Antwerpen,51.1841,4.3825
|
8 |
+
Amsterdam,52.3434,4.8533
|
9 |
+
Los Angeles,34.073814,-118.240784
|
10 |
+
Berlin,52.5147,13.2395
|
11 |
+
Helsinki,60.1872,24.9268
|
12 |
+
Melbourne,-37.8209,144.9783
|
13 |
+
Rome,41.933964,12.454297
|
14 |
+
Tokyo,35.6778,139.7145
|
15 |
+
Mexico City,19.3319,-99.1922
|
16 |
+
Munich,48.1732,11.5466
|
17 |
+
Montreal,45.5577,-73.5515
|
18 |
+
Moskva,55.7812,37.6261
|
19 |
+
Seoul,37.5158,127.0728
|
20 |
+
Barcelona,41.3647,2.1557
|
21 |
+
Atlanta,33.76,-84.3932
|
22 |
+
Sydney,-33.8354,151.0654
|
23 |
+
Beijing,39.9929,116.3965
|
24 |
+
Rio de Janeiro,-22.8933,-43.2923
|
noc_regions.csv
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
NOC,region,notes
|
olympic_app.py
ADDED
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import functions
|
4 |
+
import important
|
5 |
+
import plotly.express as px
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import seaborn as sns
|
8 |
+
import plotly.figure_factory as ff
|
9 |
+
|
10 |
+
|
11 |
+
df = pd.read_csv("athlete_events.csv")
|
12 |
+
region_df = pd.read_csv("noc_regions.csv")
|
13 |
+
|
14 |
+
df = functions.preprocess1(df, region_df)
|
15 |
+
|
16 |
+
|
17 |
+
#st.title("Olympic Data Analysis.")
|
18 |
+
|
19 |
+
st.sidebar.title("Olympic Data Analysis.")
|
20 |
+
st.sidebar.image("Olympic_image.jpg")
|
21 |
+
user_menu = st.sidebar.radio("Select an option", ("Overview", "Overall Analysis", "Medal tally", "Country-wise Analysis", "Athlete-wise Analysis"))
|
22 |
+
|
23 |
+
if user_menu == "Overview":
|
24 |
+
st.image("Analysis_image.png")
|
25 |
+
|
26 |
+
if user_menu == "Overall Analysis":
|
27 |
+
editions = df["Year"].unique().shape[0]-1
|
28 |
+
cities = df["City"].unique().shape[0]
|
29 |
+
sports = df["Sport"].unique().shape[0]
|
30 |
+
events = df["Event"].unique().shape[0]
|
31 |
+
athletes = df["Name"].unique().shape[0]
|
32 |
+
nations = df["region"].unique().shape[0]
|
33 |
+
|
34 |
+
st.title("Top Statistics")
|
35 |
+
|
36 |
+
col1, col2, col3 = st.columns(3)
|
37 |
+
|
38 |
+
with col1:
|
39 |
+
st.header("Editions")
|
40 |
+
st.title(editions)
|
41 |
+
with col2:
|
42 |
+
st.header("cities")
|
43 |
+
st.title(cities)
|
44 |
+
with col3:
|
45 |
+
st.header("sports")
|
46 |
+
st.title(sports)
|
47 |
+
|
48 |
+
col1, col2, col3 = st.columns(3)
|
49 |
+
|
50 |
+
with col1:
|
51 |
+
st.header("events")
|
52 |
+
st.title(events)
|
53 |
+
with col2:
|
54 |
+
st.header("athletes")
|
55 |
+
st.title(athletes)
|
56 |
+
with col3:
|
57 |
+
st.header("nations")
|
58 |
+
st.title(nations)
|
59 |
+
|
60 |
+
st.subheader("Countries have hosted the olympics.")
|
61 |
+
n_df = df.drop_duplicates(subset="Year")[["Year","City"]]
|
62 |
+
fig = px.bar(n_df, x='City', y='Year',text_auto = True,)
|
63 |
+
fig.update_xaxes(tickangle=45)
|
64 |
+
st.plotly_chart(fig)
|
65 |
+
|
66 |
+
countries_over_time = important.data_over_time(df,"region")
|
67 |
+
fig = px.line(countries_over_time, x="Year", y="region", title = 'Participating Countries over the years')
|
68 |
+
st.plotly_chart(fig)
|
69 |
+
|
70 |
+
events_over_time = important.data_over_time(df, "Event")
|
71 |
+
fig = px.line(events_over_time, x="Year", y="Event", title = 'Events over the years')
|
72 |
+
st.plotly_chart(fig)
|
73 |
+
|
74 |
+
athlete_over_time = important.data_over_time(df, "Name")
|
75 |
+
fig = px.line(athlete_over_time, x="Year", y="Name", title = 'Athletes over the years')
|
76 |
+
st.plotly_chart(fig)
|
77 |
+
|
78 |
+
st.title("No. of Events over time(Every Sport)")
|
79 |
+
fig, ax = plt.subplots(figsize=(20,20))
|
80 |
+
x = df.drop_duplicates(["Year", "Sport", "Event"])
|
81 |
+
ax = sns.heatmap(x.pivot_table(index="Sport", columns="Year", values="Event", aggfunc="count").fillna(0).astype("int"),annot=True)
|
82 |
+
st.pyplot(fig)
|
83 |
+
|
84 |
+
sport_list = df["Sport"].unique().tolist()
|
85 |
+
sport_list.sort()
|
86 |
+
sport_list.insert(0,"Overall")
|
87 |
+
|
88 |
+
selected_sport = st.selectbox("Select a Sport",sport_list)
|
89 |
+
st.title("Most successful Athletes")
|
90 |
+
x = important.most_successful(df,selected_sport)
|
91 |
+
st.table(x)
|
92 |
+
|
93 |
+
st.title("Most popular sports of Olympics")
|
94 |
+
sport_df = df["Sport"].value_counts().reset_index()
|
95 |
+
fig = px.pie(sport_df, values='count', names='Sport')
|
96 |
+
fig.update_layout(autosize=False, width=850,height=700)
|
97 |
+
st.plotly_chart(fig)
|
98 |
+
|
99 |
+
st.title("Locations of Stadium of countries where olympics held.")
|
100 |
+
data1 = pd.read_csv("lat_long.csv")
|
101 |
+
st.map(data1)
|
102 |
+
|
103 |
+
|
104 |
+
if user_menu == "Medal tally":
|
105 |
+
|
106 |
+
st.sidebar.header("Medal tally")
|
107 |
+
years, country = important.country_year_list(df)
|
108 |
+
|
109 |
+
selected_year = st.sidebar.selectbox("Select year", years)
|
110 |
+
selected_country = st.sidebar.selectbox("Select country", country)
|
111 |
+
|
112 |
+
medal_tally = important.fetch_medal_tally(df,selected_year,selected_country)
|
113 |
+
|
114 |
+
if selected_year == "Overall" and selected_country == "Overall":
|
115 |
+
st.title("Overall Medal Tally")
|
116 |
+
if selected_year != "Overall" and selected_country == "Overall":
|
117 |
+
st.title("Medal Tally in " + str(selected_year) + " Olympics")
|
118 |
+
if selected_year == "Overall" and selected_country != "Overall":
|
119 |
+
st.title(selected_country + " overall performance in Olympics" )
|
120 |
+
if selected_year != "Overall" and selected_country != "Overall":
|
121 |
+
st.title(selected_country + "'s performance in " + str(selected_year) + " Olympics")
|
122 |
+
st.table(medal_tally)
|
123 |
+
|
124 |
+
|
125 |
+
if user_menu == "Country-wise Analysis":
|
126 |
+
|
127 |
+
st.title("Country-wise Analysis")
|
128 |
+
|
129 |
+
country = df["region"].dropna().unique().tolist()
|
130 |
+
country.sort()
|
131 |
+
|
132 |
+
selected_country = st.sidebar.selectbox("Select a country", country)
|
133 |
+
new_region = important.country_wise_medal_tally(df, selected_country)
|
134 |
+
|
135 |
+
fig = px.line(new_region, x = "Year", y="Medal")
|
136 |
+
st.subheader(selected_country + "'s Medal Tally over the years")
|
137 |
+
st.plotly_chart(fig)
|
138 |
+
|
139 |
+
pt = important.country_event_heatmap(df,selected_country)
|
140 |
+
st.subheader(selected_country+" excels in the following sports")
|
141 |
+
fig, ax = plt.subplots(figsize=(20,20))
|
142 |
+
ax = sns.heatmap(pt, annot=True)
|
143 |
+
st.pyplot(fig)
|
144 |
+
|
145 |
+
athlete = important.most_successful_athletes(df, selected_country)
|
146 |
+
st.subheader("Top 10 athletes of "+ selected_country)
|
147 |
+
st.table(athlete)
|
148 |
+
|
149 |
+
if user_menu == "Athlete-wise Analysis":
|
150 |
+
|
151 |
+
athlete_df = df.drop_duplicates(subset=["Name", "region"])
|
152 |
+
x1 = athlete_df["Age"].dropna()
|
153 |
+
x2 = athlete_df[athlete_df["Medal"] == "Gold"]["Age"].dropna()
|
154 |
+
x3 = athlete_df[athlete_df["Medal"] == "Silver"]["Age"].dropna()
|
155 |
+
x4 = athlete_df[athlete_df["Medal"] == "Bronze"]["Age"].dropna()
|
156 |
+
|
157 |
+
st.title("Distribution of Age.")
|
158 |
+
fig = ff.create_distplot([x1,x2,x3,x4], ["Age Distribution","Gold Medalist","Silver Medalist","Bronze Medalist"], show_hist=False, show_rug=False)
|
159 |
+
fig.update_layout(autosize=False, width=850,height=530)
|
160 |
+
st.plotly_chart(fig)
|
161 |
+
|
162 |
+
st.title("Distribution of Age wrt sports(Gold Medalist)")
|
163 |
+
famous_sports =['Basketball','Judo', 'Football','Tug-Of-War','Athletics','Swimming','Badminton','Sailing','Gymnastics','Art Competitions',
|
164 |
+
'Handball','Weightlifting','Wrestling','Water Polo','Hockey','Rowing','Fencing','Shooting','Boxing','Taekwondo',
|
165 |
+
'Cycling', 'Diving', 'Canoeing', 'Tennis', 'Modern Pentathlon', 'Golf', 'Softball', 'Archery', 'Volleyball','Synchronized Swimming',
|
166 |
+
'Table Tennis', 'Baseball','Rhythmic Gymnastics','Rugby Sevens', 'Beach Volleyball', 'Triathlon', 'Rugby', 'Lacrosse', 'Polo', 'Cricket',
|
167 |
+
'Ice Hockey','Motorboating']
|
168 |
+
x = []
|
169 |
+
name = []
|
170 |
+
for sport in famous_sports:
|
171 |
+
temp_df = athlete_df[athlete_df["Sport"] == sport]
|
172 |
+
x.append(temp_df[temp_df["Medal"]=="Gold"]["Age"].dropna())
|
173 |
+
name.append(sport)
|
174 |
+
|
175 |
+
fig1 = ff.create_distplot(x,name,show_hist=False, show_rug=False)
|
176 |
+
fig1.update_layout(autosize=False, width=850,height=530)
|
177 |
+
st.plotly_chart(fig1)
|
178 |
+
|
179 |
+
|
180 |
+
st.title("Height vs Weight")
|
181 |
+
sport_list = df["Sport"].unique().tolist()
|
182 |
+
sport_list.sort()
|
183 |
+
sport_list.insert(0,"Overall")
|
184 |
+
|
185 |
+
selected_sport = st.selectbox("Select a Sport",sport_list)
|
186 |
+
new_df = important.weight_v_height(df,selected_sport)
|
187 |
+
|
188 |
+
fig, ax = plt.subplots(figsize=(10,10))
|
189 |
+
ax = sns.scatterplot(new_df, x ="Weight",y = "Height", hue=new_df["Medal"],style=new_df["Sex"],s=100)
|
190 |
+
st.pyplot(fig)
|
191 |
+
|
192 |
+
st.title("Men vs Women participation over the years")
|
193 |
+
final_df = important.men_vs_women(df)
|
194 |
+
fig = px.line(final_df, x="Year", y=["Male","Female"])
|
195 |
+
st.plotly_chart(fig)
|
196 |
+
|
197 |
+
|
198 |
+
|
199 |
+
|
200 |
+
|
201 |
+
|
202 |
+
|
203 |
+
|
204 |
+
|
205 |
+
|
206 |
+
|
207 |
+
|
208 |
+
|
209 |
+
|
210 |
+
|
211 |
+
|
212 |
+
|
213 |
+
|
214 |
+
|
olympics-data-analysis-web-app
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
A Streamlit web application for the analysis of olympics data.
|
2 |
+
Live Web App : https://olympicanalysiss.streamlit.app/
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy==1.26.3
|
2 |
+
pandas==2.1.4
|
3 |
+
matplotlib==3.8.2
|
4 |
+
seaborn==0.13.1
|
5 |
+
plotly==5.18.0
|
6 |
+
streamlit==1.29.0
|
7 |
+
gunicorn==19.7.1
|
8 |
+
scipy==1.11.4
|