Spaces:
Sleeping
Sleeping
def medal_tally(df): | |
medal_tally = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"]) | |
medal_tally = medal_tally.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index() | |
medal_tally["Total"] = medal_tally["Gold"]+medal_tally["Silver"]+medal_tally["Bronze"] | |
return medal_tally | |
def country_year_list(df): | |
years = df["Year"].unique().tolist() | |
years.sort() | |
years.insert(0,"Overall") | |
country = df["region"].dropna().unique().tolist() | |
country.sort() | |
country.insert(0,"Overall") | |
return years, country | |
def fetch_medal_tally(df,year,country): | |
medal_df = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"]) | |
flag = 0 | |
if year == "Overall" and country == "Overall": | |
temp_df = medal_df | |
if year == "Overall" and country != "Overall": | |
flag = 1 | |
temp_df = medal_df[medal_df["region"] == country] | |
if year != "Overall" and country == "Overall": | |
temp_df = medal_df[medal_df["Year"] == int(year)] | |
if year != "Overall" and country != "Overall": | |
temp_df = medal_df[(medal_df["Year"] == year) & (medal_df["region"] == country)] | |
if flag == 1: | |
x = temp_df.groupby("Year").sum()[["Gold", "Silver", "Bronze"]].sort_values("Year").reset_index() | |
else: | |
x = temp_df.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index() | |
x["Total"] = x["Gold"]+x["Silver"]+x["Bronze"] | |
return x | |
def data_over_time(df,col): | |
year_count = df.drop_duplicates(["Year", col])["Year"].value_counts().reset_index().sort_values("Year") | |
year_count.rename(columns={"count":col}, inplace = True) | |
return year_count | |
def most_successful(df, sport): | |
temp_df = df.dropna(subset=["Medal"]) | |
if sport != "Overall": | |
temp_df = temp_df[temp_df["Sport"] == sport] | |
x= temp_df["Name"].value_counts().reset_index().head(15).merge(df,left_on="Name", | |
right_on="Name",how="left") [["Name","count","Sport","region"]].drop_duplicates("Name") | |
x.rename(columns={"count":"Medals"}, inplace=True) | |
return x | |
def country_wise_medal_tally(df, region): | |
temp_df = df.dropna(subset=["Medal"]) | |
temp_df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"], inplace=True) | |
new_df=temp_df[temp_df["region"]==region] | |
new_df.groupby("Year").count()["Medal"].reset_index() | |
final_df = new_df.groupby("Year").count()["Medal"].reset_index() | |
return final_df | |
def country_event_heatmap(df,country): | |
temp_df = df.dropna(subset=["Medal"]) | |
new_df=temp_df[temp_df["region"]==country] | |
pt = new_df.pivot_table(index="Sport", columns="Year", values="Medal", aggfunc="count").fillna(0).astype("int") | |
return pt | |
def most_successful_athletes(df, country): | |
temp_df = df.dropna(subset=["Medal"]) | |
temp_df = temp_df[temp_df["region"] == country] | |
x= temp_df["Name"].value_counts().reset_index().head(10).merge(df,left_on="Name", right_on="Name", | |
how="left")[["Name","count","Sport"]].drop_duplicates("Name") | |
x.rename(columns={"count":"Medals"}, inplace=True) | |
return x | |
def weight_v_height(df, sport): | |
athlete_df = df.drop_duplicates(subset=["Name", "region"]) | |
athlete_df["Medal"].fillna("No Medal", inplace=True) | |
if sport != "Overall": | |
temp_df = athlete_df[athlete_df["Sport"]==sport] | |
return temp_df | |
else: | |
return athlete_df | |
def men_vs_women(df): | |
athlete_df = df.drop_duplicates(subset=["Name", "region"]) | |
men = athlete_df[athlete_df["Sex"]=="M"].groupby("Year").count()["Name"].reset_index() | |
women = athlete_df[athlete_df["Sex"]=="F"].groupby("Year").count()["Name"].reset_index() | |
final = men.merge(women, on="Year", how="left") | |
final.rename(columns={"Name_x":"Male", "Name_y":"Female"}, inplace=True) | |
final.fillna(0, inplace=True) | |
return final | |