File size: 580 Bytes
fe14f7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import pandas as pd

df = pd.read_csv("athlete_events.csv")
region_df = pd.read_csv("noc_regions.csv")

def preprocess1(df, region_df):
    
    
    #filtering for summer olympics
    df = df[df["Season"] == "Summer"]

    #merging with noc_regions data
    df = df.merge(region_df, on="NOC", how="left")

    #dropping duplicates
    df.drop_duplicates(inplace=True)

    #one hot encoding medal column
    dummy = pd.get_dummies(df["Medal"], dtype=int)

    #and concat dummy with original data 
    df = pd.concat([df, dummy], axis=1)

    return df