Spaces:

musk12
/

olympic_eda

Sleeping

App Files Files Community

musk12 commited on 23 days ago

Commit

fe14f7a

•

1 Parent(s): 3656e39

olympic_eda

Browse files

Files changed (12) hide show

.gitattributes +2 -0
Analysis_image.png +3 -0
Olympic_image.jpg +0 -0
README.md +5 -13
athlete_events.csv +3 -0
functions.py +25 -0
important.py +103 -0
lat_long.csv +24 -0
noc_regions.csv +1 -0
olympic_app.py +214 -0
olympics-data-analysis-web-app +2 -0
requirements.txt +8 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Analysis_image.png filter=lfs diff=lfs merge=lfs -text
+athlete_events.csv filter=lfs diff=lfs merge=lfs -text

Analysis_image.png ADDED Viewed

Git LFS Details

SHA256: 54a70825253a8bcf63f2feb3701f054e7ffde3a89deb64e179d5b7876047f53e
Pointer size: 132 Bytes
Size of remote file: 8.43 MB

Olympic_image.jpg ADDED Viewed

README.md CHANGED Viewed

@@ -1,13 +1,5 @@
----
-title: Olympic Eda
-emoji: 👀
-colorFrom: indigo
-colorTo: red
-sdk: streamlit
-sdk_version: 1.36.0
-app_file: app.py
-pinned: false
-license: mit
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# olympics-data-analysis-web-app
+A Streamlit web application for the analysis of olympics data.
+Live Web App : https://olympicanalysiss.streamlit.app/

athlete_events.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1751e89b509dd8c0fa0979ee258eef4374aa93f17d64cd07383ad3f57c5aabd7
+size 41500688

functions.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import pandas as pd
+df = pd.read_csv("athlete_events.csv")
+region_df = pd.read_csv("noc_regions.csv")
+def preprocess1(df, region_df):
+    #filtering for summer olympics
+    df = df[df["Season"] == "Summer"]
+    #merging with noc_regions data
+    df = df.merge(region_df, on="NOC", how="left")
+    #dropping duplicates
+    df.drop_duplicates(inplace=True)
+    #one hot encoding medal column
+    dummy = pd.get_dummies(df["Medal"], dtype=int)
+    #and concat dummy with original data
+    df = pd.concat([df, dummy], axis=1)
+    return df

important.py ADDED Viewed

	@@ -0,0 +1,103 @@

+def medal_tally(df):
+    medal_tally = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
+    medal_tally = medal_tally.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
+    medal_tally["Total"] = medal_tally["Gold"]+medal_tally["Silver"]+medal_tally["Bronze"]
+    return medal_tally
+def country_year_list(df):
+    years = df["Year"].unique().tolist()
+    years.sort()
+    years.insert(0,"Overall")
+    country = df["region"].dropna().unique().tolist()
+    country.sort()
+    country.insert(0,"Overall")
+    return years, country
+def fetch_medal_tally(df,year,country):
+    medal_df = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
+    flag = 0
+    if year == "Overall" and country == "Overall":
+        temp_df = medal_df
+    if year == "Overall" and country != "Overall":
+        flag = 1
+        temp_df = medal_df[medal_df["region"] == country]
+    if year != "Overall" and country == "Overall":
+        temp_df = medal_df[medal_df["Year"] == int(year)]
+    if year != "Overall" and country != "Overall":
+        temp_df = medal_df[(medal_df["Year"] == year) & (medal_df["region"] == country)]
+    if flag == 1:
+        x = temp_df.groupby("Year").sum()[["Gold", "Silver", "Bronze"]].sort_values("Year").reset_index()
+    else:
+        x = temp_df.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
+    x["Total"] = x["Gold"]+x["Silver"]+x["Bronze"]
+    return x
+def data_over_time(df,col):
+    year_count = df.drop_duplicates(["Year", col])["Year"].value_counts().reset_index().sort_values("Year")
+    year_count.rename(columns={"count":col}, inplace = True)
+    return year_count
+def most_successful(df, sport):
+    temp_df = df.dropna(subset=["Medal"])
+    if sport != "Overall":
+        temp_df = temp_df[temp_df["Sport"] == sport]
+    x= temp_df["Name"].value_counts().reset_index().head(15).merge(df,left_on="Name",
+                                                                   right_on="Name",how="left")       [["Name","count","Sport","region"]].drop_duplicates("Name")
+    x.rename(columns={"count":"Medals"}, inplace=True)
+    return x
+def country_wise_medal_tally(df, region):
+    temp_df = df.dropna(subset=["Medal"])
+    temp_df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"], inplace=True)
+    new_df=temp_df[temp_df["region"]==region]
+    new_df.groupby("Year").count()["Medal"].reset_index()
+    final_df = new_df.groupby("Year").count()["Medal"].reset_index()
+    return final_df
+def country_event_heatmap(df,country):
+    temp_df = df.dropna(subset=["Medal"])
+    new_df=temp_df[temp_df["region"]==country]
+    pt = new_df.pivot_table(index="Sport", columns="Year", values="Medal", aggfunc="count").fillna(0).astype("int")
+    return pt
+def most_successful_athletes(df, country):
+    temp_df = df.dropna(subset=["Medal"])
+    temp_df = temp_df[temp_df["region"] == country]
+    x= temp_df["Name"].value_counts().reset_index().head(10).merge(df,left_on="Name", right_on="Name",
+                                                                   how="left")[["Name","count","Sport"]].drop_duplicates("Name")
+    x.rename(columns={"count":"Medals"}, inplace=True)
+    return x
+def weight_v_height(df, sport):
+    athlete_df = df.drop_duplicates(subset=["Name", "region"])
+    athlete_df["Medal"].fillna("No Medal", inplace=True)
+    if sport != "Overall":
+        temp_df = athlete_df[athlete_df["Sport"]==sport]
+        return temp_df
+    else:
+        return athlete_df
+def men_vs_women(df):
+    athlete_df = df.drop_duplicates(subset=["Name", "region"])
+    men = athlete_df[athlete_df["Sex"]=="M"].groupby("Year").count()["Name"].reset_index()
+    women = athlete_df[athlete_df["Sex"]=="F"].groupby("Year").count()["Name"].reset_index()
+    final = men.merge(women, on="Year", how="left")
+    final.rename(columns={"Name_x":"Male", "Name_y":"Female"}, inplace=True)
+    final.fillna(0, inplace=True)
+    return final

lat_long.csv ADDED Viewed

	@@ -0,0 +1,24 @@

+City,latitude,longitude
+Athina,38.0359,23.787
+Paris,48.841465,2.252616
+St. Louis,38.6479,-90.3138
+London,51.5387,-0.0172
+Stockholm,59.3453,18.079
+Antwerpen,51.1841,4.3825
+Amsterdam,52.3434,4.8533
+Los Angeles,34.073814,-118.240784
+Berlin,52.5147,13.2395
+Helsinki,60.1872,24.9268
+Melbourne,-37.8209,144.9783
+Rome,41.933964,12.454297
+Tokyo,35.6778,139.7145
+Mexico City,19.3319,-99.1922
+Munich,48.1732,11.5466
+Montreal,45.5577,-73.5515
+Moskva,55.7812,37.6261
+Seoul,37.5158,127.0728
+Barcelona,41.3647,2.1557
+Atlanta,33.76,-84.3932
+Sydney,-33.8354,151.0654
+Beijing,39.9929,116.3965
+Rio de Janeiro,-22.8933,-43.2923

noc_regions.csv ADDED Viewed

	@@ -0,0 +1 @@


1	+ NOC,region,notes

olympic_app.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import streamlit as st
+import pandas as pd
+import functions
+import important
+import plotly.express as px
+import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.figure_factory as ff
+df = pd.read_csv("athlete_events.csv")
+region_df = pd.read_csv("noc_regions.csv")
+df = functions.preprocess1(df, region_df)
+#st.title("Olympic Data Analysis.")
+st.sidebar.title("Olympic Data Analysis.")
+st.sidebar.image("Olympic_image.jpg")
+user_menu = st.sidebar.radio("Select an option", ("Overview", "Overall Analysis", "Medal tally", "Country-wise Analysis", "Athlete-wise Analysis"))
+if user_menu == "Overview":
+    st.image("Analysis_image.png")
+if user_menu == "Overall Analysis":
+    editions = df["Year"].unique().shape[0]-1
+    cities = df["City"].unique().shape[0]
+    sports = df["Sport"].unique().shape[0]
+    events = df["Event"].unique().shape[0]
+    athletes = df["Name"].unique().shape[0]
+    nations = df["region"].unique().shape[0]
+    st.title("Top Statistics")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.header("Editions")
+        st.title(editions)
+    with col2:
+        st.header("cities")
+        st.title(cities)
+    with col3:
+        st.header("sports")
+        st.title(sports)
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.header("events")
+        st.title(events)
+    with col2:
+        st.header("athletes")
+        st.title(athletes)
+    with col3:
+        st.header("nations")
+        st.title(nations)
+    st.subheader("Countries have hosted the olympics.")
+    n_df = df.drop_duplicates(subset="Year")[["Year","City"]]
+    fig = px.bar(n_df, x='City', y='Year',text_auto = True,)
+    fig.update_xaxes(tickangle=45)
+    st.plotly_chart(fig)
+    countries_over_time = important.data_over_time(df,"region")
+    fig = px.line(countries_over_time, x="Year", y="region", title = 'Participating Countries over the years')
+    st.plotly_chart(fig)
+    events_over_time = important.data_over_time(df, "Event")
+    fig = px.line(events_over_time, x="Year", y="Event", title = 'Events over the years')
+    st.plotly_chart(fig)
+    athlete_over_time = important.data_over_time(df, "Name")
+    fig = px.line(athlete_over_time, x="Year", y="Name", title = 'Athletes over the years')
+    st.plotly_chart(fig)
+    st.title("No. of Events over time(Every Sport)")
+    fig, ax = plt.subplots(figsize=(20,20))
+    x = df.drop_duplicates(["Year", "Sport", "Event"])
+    ax = sns.heatmap(x.pivot_table(index="Sport", columns="Year", values="Event", aggfunc="count").fillna(0).astype("int"),annot=True)
+    st.pyplot(fig)
+    sport_list = df["Sport"].unique().tolist()
+    sport_list.sort()
+    sport_list.insert(0,"Overall")
+    selected_sport = st.selectbox("Select a Sport",sport_list)
+    st.title("Most successful Athletes")
+    x = important.most_successful(df,selected_sport)
+    st.table(x)
+    st.title("Most popular sports of Olympics")
+    sport_df = df["Sport"].value_counts().reset_index()
+    fig = px.pie(sport_df, values='count', names='Sport')
+    fig.update_layout(autosize=False, width=850,height=700)
+    st.plotly_chart(fig)
+    st.title("Locations of Stadium of countries where olympics held.")
+    data1 = pd.read_csv("lat_long.csv")
+    st.map(data1)
+if user_menu == "Medal tally":
+    st.sidebar.header("Medal tally")
+    years, country = important.country_year_list(df)
+    selected_year = st.sidebar.selectbox("Select year", years)
+    selected_country = st.sidebar.selectbox("Select country", country)
+    medal_tally = important.fetch_medal_tally(df,selected_year,selected_country)
+    if selected_year == "Overall" and selected_country == "Overall":
+        st.title("Overall Medal Tally")
+    if selected_year != "Overall" and selected_country == "Overall":
+        st.title("Medal Tally in " + str(selected_year) + " Olympics")
+    if selected_year == "Overall" and selected_country != "Overall":
+        st.title(selected_country + " overall performance in Olympics" )
+    if selected_year != "Overall" and selected_country != "Overall":
+        st.title(selected_country + "'s performance in " + str(selected_year) + " Olympics")
+    st.table(medal_tally)
+if user_menu == "Country-wise Analysis":
+    st.title("Country-wise Analysis")
+    country = df["region"].dropna().unique().tolist()
+    country.sort()
+    selected_country = st.sidebar.selectbox("Select a country", country)
+    new_region = important.country_wise_medal_tally(df, selected_country)
+    fig = px.line(new_region, x = "Year", y="Medal")
+    st.subheader(selected_country + "'s Medal Tally over the years")
+    st.plotly_chart(fig)
+    pt = important.country_event_heatmap(df,selected_country)
+    st.subheader(selected_country+" excels in the following sports")
+    fig, ax = plt.subplots(figsize=(20,20))
+    ax = sns.heatmap(pt, annot=True)
+    st.pyplot(fig)
+    athlete = important.most_successful_athletes(df, selected_country)
+    st.subheader("Top 10 athletes of "+ selected_country)
+    st.table(athlete)
+if user_menu == "Athlete-wise Analysis":
+    athlete_df = df.drop_duplicates(subset=["Name", "region"])
+    x1 = athlete_df["Age"].dropna()
+    x2 = athlete_df[athlete_df["Medal"] == "Gold"]["Age"].dropna()
+    x3 = athlete_df[athlete_df["Medal"] == "Silver"]["Age"].dropna()
+    x4 = athlete_df[athlete_df["Medal"] == "Bronze"]["Age"].dropna()
+    st.title("Distribution of Age.")
+    fig = ff.create_distplot([x1,x2,x3,x4], ["Age Distribution","Gold Medalist","Silver Medalist","Bronze Medalist"], show_hist=False, show_rug=False)
+    fig.update_layout(autosize=False, width=850,height=530)
+    st.plotly_chart(fig)
+    st.title("Distribution of Age wrt sports(Gold Medalist)")
+    famous_sports =['Basketball','Judo', 'Football','Tug-Of-War','Athletics','Swimming','Badminton','Sailing','Gymnastics','Art Competitions',
+                'Handball','Weightlifting','Wrestling','Water Polo','Hockey','Rowing','Fencing','Shooting','Boxing','Taekwondo',
+                'Cycling', 'Diving', 'Canoeing', 'Tennis', 'Modern Pentathlon', 'Golf', 'Softball', 'Archery', 'Volleyball','Synchronized Swimming',
+                 'Table Tennis', 'Baseball','Rhythmic Gymnastics','Rugby Sevens', 'Beach Volleyball', 'Triathlon', 'Rugby', 'Lacrosse', 'Polo', 'Cricket',
+                'Ice Hockey','Motorboating']
+    x = []
+    name = []
+    for sport in famous_sports:
+        temp_df = athlete_df[athlete_df["Sport"] == sport]
+        x.append(temp_df[temp_df["Medal"]=="Gold"]["Age"].dropna())
+        name.append(sport)
+    fig1 = ff.create_distplot(x,name,show_hist=False, show_rug=False)
+    fig1.update_layout(autosize=False, width=850,height=530)
+    st.plotly_chart(fig1)
+    st.title("Height vs Weight")
+    sport_list = df["Sport"].unique().tolist()
+    sport_list.sort()
+    sport_list.insert(0,"Overall")
+    selected_sport = st.selectbox("Select a Sport",sport_list)
+    new_df = important.weight_v_height(df,selected_sport)
+    fig, ax = plt.subplots(figsize=(10,10))
+    ax = sns.scatterplot(new_df, x ="Weight",y = "Height", hue=new_df["Medal"],style=new_df["Sex"],s=100)
+    st.pyplot(fig)
+    st.title("Men vs Women participation over the years")
+    final_df = important.men_vs_women(df)
+    fig = px.line(final_df, x="Year", y=["Male","Female"])
+    st.plotly_chart(fig)

olympics-data-analysis-web-app ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ A Streamlit web application for the analysis of olympics data.
2	+ Live Web App : https://olympicanalysiss.streamlit.app/

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+numpy==1.26.3
+pandas==2.1.4
+matplotlib==3.8.2
+seaborn==0.13.1
+plotly==5.18.0
+streamlit==1.29.0
+gunicorn==19.7.1
+scipy==1.11.4