olympic_eda / functions.py
musk12's picture
olympic_eda
fe14f7a verified
raw
history blame contribute delete
580 Bytes
import pandas as pd
df = pd.read_csv("athlete_events.csv")
region_df = pd.read_csv("noc_regions.csv")
def preprocess1(df, region_df):
#filtering for summer olympics
df = df[df["Season"] == "Summer"]
#merging with noc_regions data
df = df.merge(region_df, on="NOC", how="left")
#dropping duplicates
df.drop_duplicates(inplace=True)
#one hot encoding medal column
dummy = pd.get_dummies(df["Medal"], dtype=int)
#and concat dummy with original data
df = pd.concat([df, dummy], axis=1)
return df