olympic_eda / important.py
musk12's picture
olympic_eda
fe14f7a verified
raw
history blame contribute delete
No virus
4.24 kB
def medal_tally(df):
medal_tally = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
medal_tally = medal_tally.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
medal_tally["Total"] = medal_tally["Gold"]+medal_tally["Silver"]+medal_tally["Bronze"]
return medal_tally
def country_year_list(df):
years = df["Year"].unique().tolist()
years.sort()
years.insert(0,"Overall")
country = df["region"].dropna().unique().tolist()
country.sort()
country.insert(0,"Overall")
return years, country
def fetch_medal_tally(df,year,country):
medal_df = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
flag = 0
if year == "Overall" and country == "Overall":
temp_df = medal_df
if year == "Overall" and country != "Overall":
flag = 1
temp_df = medal_df[medal_df["region"] == country]
if year != "Overall" and country == "Overall":
temp_df = medal_df[medal_df["Year"] == int(year)]
if year != "Overall" and country != "Overall":
temp_df = medal_df[(medal_df["Year"] == year) & (medal_df["region"] == country)]
if flag == 1:
x = temp_df.groupby("Year").sum()[["Gold", "Silver", "Bronze"]].sort_values("Year").reset_index()
else:
x = temp_df.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
x["Total"] = x["Gold"]+x["Silver"]+x["Bronze"]
return x
def data_over_time(df,col):
year_count = df.drop_duplicates(["Year", col])["Year"].value_counts().reset_index().sort_values("Year")
year_count.rename(columns={"count":col}, inplace = True)
return year_count
def most_successful(df, sport):
temp_df = df.dropna(subset=["Medal"])
if sport != "Overall":
temp_df = temp_df[temp_df["Sport"] == sport]
x= temp_df["Name"].value_counts().reset_index().head(15).merge(df,left_on="Name",
right_on="Name",how="left") [["Name","count","Sport","region"]].drop_duplicates("Name")
x.rename(columns={"count":"Medals"}, inplace=True)
return x
def country_wise_medal_tally(df, region):
temp_df = df.dropna(subset=["Medal"])
temp_df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"], inplace=True)
new_df=temp_df[temp_df["region"]==region]
new_df.groupby("Year").count()["Medal"].reset_index()
final_df = new_df.groupby("Year").count()["Medal"].reset_index()
return final_df
def country_event_heatmap(df,country):
temp_df = df.dropna(subset=["Medal"])
new_df=temp_df[temp_df["region"]==country]
pt = new_df.pivot_table(index="Sport", columns="Year", values="Medal", aggfunc="count").fillna(0).astype("int")
return pt
def most_successful_athletes(df, country):
temp_df = df.dropna(subset=["Medal"])
temp_df = temp_df[temp_df["region"] == country]
x= temp_df["Name"].value_counts().reset_index().head(10).merge(df,left_on="Name", right_on="Name",
how="left")[["Name","count","Sport"]].drop_duplicates("Name")
x.rename(columns={"count":"Medals"}, inplace=True)
return x
def weight_v_height(df, sport):
athlete_df = df.drop_duplicates(subset=["Name", "region"])
athlete_df["Medal"].fillna("No Medal", inplace=True)
if sport != "Overall":
temp_df = athlete_df[athlete_df["Sport"]==sport]
return temp_df
else:
return athlete_df
def men_vs_women(df):
athlete_df = df.drop_duplicates(subset=["Name", "region"])
men = athlete_df[athlete_df["Sex"]=="M"].groupby("Year").count()["Name"].reset_index()
women = athlete_df[athlete_df["Sex"]=="F"].groupby("Year").count()["Name"].reset_index()
final = men.merge(women, on="Year", how="left")
final.rename(columns={"Name_x":"Male", "Name_y":"Female"}, inplace=True)
final.fillna(0, inplace=True)
return final