Spaces:
Sleeping
Sleeping
import pandas as pd | |
df = pd.read_csv("athlete_events.csv") | |
region_df = pd.read_csv("noc_regions.csv") | |
def preprocess1(df, region_df): | |
#filtering for summer olympics | |
df = df[df["Season"] == "Summer"] | |
#merging with noc_regions data | |
df = df.merge(region_df, on="NOC", how="left") | |
#dropping duplicates | |
df.drop_duplicates(inplace=True) | |
#one hot encoding medal column | |
dummy = pd.get_dummies(df["Medal"], dtype=int) | |
#and concat dummy with original data | |
df = pd.concat([df, dummy], axis=1) | |
return df | |