File size: 4,241 Bytes
fe14f7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
def medal_tally(df):
    medal_tally = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
    medal_tally = medal_tally.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
    medal_tally["Total"] = medal_tally["Gold"]+medal_tally["Silver"]+medal_tally["Bronze"]

    return medal_tally

def country_year_list(df):
    years = df["Year"].unique().tolist()
    years.sort()
    years.insert(0,"Overall")

    country = df["region"].dropna().unique().tolist()
    country.sort()
    country.insert(0,"Overall")

    return years, country

def fetch_medal_tally(df,year,country):
    medal_df = df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"])
    flag = 0
    if year == "Overall" and country == "Overall":
        temp_df = medal_df
    if year == "Overall" and country != "Overall":
        flag = 1
        temp_df = medal_df[medal_df["region"] == country]
    if year != "Overall" and country == "Overall":
        temp_df = medal_df[medal_df["Year"] == int(year)]
    if year != "Overall" and country != "Overall":
        temp_df = medal_df[(medal_df["Year"] == year) & (medal_df["region"] == country)]

    if flag == 1:
        x = temp_df.groupby("Year").sum()[["Gold", "Silver", "Bronze"]].sort_values("Year").reset_index()
    else:
        x = temp_df.groupby("region").sum()[["Gold", "Silver", "Bronze"]].sort_values("Gold", ascending = False).reset_index()
    x["Total"] = x["Gold"]+x["Silver"]+x["Bronze"]

    return x

def data_over_time(df,col):
    year_count = df.drop_duplicates(["Year", col])["Year"].value_counts().reset_index().sort_values("Year")
    year_count.rename(columns={"count":col}, inplace = True)
    
    return year_count


def most_successful(df, sport):
    temp_df = df.dropna(subset=["Medal"])

    if sport != "Overall":
        temp_df = temp_df[temp_df["Sport"] == sport]
    x= temp_df["Name"].value_counts().reset_index().head(15).merge(df,left_on="Name", 
                                                                   right_on="Name",how="left")       [["Name","count","Sport","region"]].drop_duplicates("Name")
    x.rename(columns={"count":"Medals"}, inplace=True)
    return x

def country_wise_medal_tally(df, region):
    
    temp_df = df.dropna(subset=["Medal"])
    temp_df.drop_duplicates(subset=["Team", "NOC", "Games", "Year", "City", "Sport", "Event", "Medal"], inplace=True)
    new_df=temp_df[temp_df["region"]==region]
    new_df.groupby("Year").count()["Medal"].reset_index()
    final_df = new_df.groupby("Year").count()["Medal"].reset_index()
    return final_df

def country_event_heatmap(df,country):
    
    temp_df = df.dropna(subset=["Medal"])
    new_df=temp_df[temp_df["region"]==country]
    pt = new_df.pivot_table(index="Sport", columns="Year", values="Medal", aggfunc="count").fillna(0).astype("int")

    return pt

def most_successful_athletes(df, country):
    temp_df = df.dropna(subset=["Medal"])

    temp_df = temp_df[temp_df["region"] == country]
    x= temp_df["Name"].value_counts().reset_index().head(10).merge(df,left_on="Name", right_on="Name",
                                                                   how="left")[["Name","count","Sport"]].drop_duplicates("Name")
    x.rename(columns={"count":"Medals"}, inplace=True)
    return x

def weight_v_height(df, sport):
    athlete_df = df.drop_duplicates(subset=["Name", "region"])
    athlete_df["Medal"].fillna("No Medal", inplace=True)
    if sport != "Overall":
        temp_df = athlete_df[athlete_df["Sport"]==sport]
        return temp_df
    else:
        return athlete_df

def men_vs_women(df):
    athlete_df = df.drop_duplicates(subset=["Name", "region"])
    men = athlete_df[athlete_df["Sex"]=="M"].groupby("Year").count()["Name"].reset_index()
    women = athlete_df[athlete_df["Sex"]=="F"].groupby("Year").count()["Name"].reset_index()
    final = men.merge(women, on="Year", how="left")
    final.rename(columns={"Name_x":"Male", "Name_y":"Female"}, inplace=True)
    final.fillna(0, inplace=True)
    return final