In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
import os

DATA_DIR = os.path.join("..", "data")

In [2]:
detailed_games_df = pd.read_csv(
    os.path.join(DATA_DIR, "AllSuperDetailedGames.csv"),
)

detailed_games_df.info()

  detailed_games_df = pd.read_csv(


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 377608 entries, 0 to 377607
Columns: 487 entries, Unnamed: 0 to ChalkSeed
dtypes: float64(347), int64(133), object(7)
memory usage: 1.4+ GB


## Split Mens & Womens data

In [3]:
mens_games_df = detailed_games_df[detailed_games_df["League"] == "M"]
wmns_games_df = detailed_games_df[detailed_games_df["League"] == "W"]

## Define Features, Targets, and register data on device

In [4]:
# define the features and target for our models
feature_cols = [
    "ScoreDiff mean reg",
    "FGMDiff mean reg",
    "FGM3Diff mean reg",
    "TODiff mean reg",

    "OppScore mean reg",
    "OppFGM mean reg",
    "OppFGM3 mean reg",
    "OppTO mean reg",
]

target_cols = ["Win"]

In [5]:
# split into training and testing datasets
MX_train, MX_test, My_train, My_test = train_test_split(
    mens_games_df[feature_cols],
    mens_games_df[target_cols],
    test_size=0.2,
    random_state=1,
)

# same for womens
WX_train, WX_test, Wy_train, Wy_test = train_test_split(
    wmns_games_df[feature_cols],
    wmns_games_df[target_cols],
    test_size=0.2,
    random_state=1,
)

In [None]:
# convert data to tensor objects and register to device
# DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

def get_device() -> str:
    if torch.cuda.is_available():
        return "cuda"
    if torch.backends.mps.is_available():
        return "mps"
    return "cpu"

DEVICE = get_device()
print(DEVICE)

In [6]:

MX_train_T = torch.tensor(
    MX_train.astype(float).values,
    dtype=torch.float32,
).to(DEVICE)

MX_test_T = torch.tensor(
    MX_test.astype(float).values,
    dtype=torch.float32,
).to(DEVICE)

My_train_T = torch.tensor(
    My_train.astype(float).values,
    dtype=torch.float32,
).to(DEVICE)

My_test_T = torch.tensor(
    My_test.astype(float).values,
    dtype=torch.float32,
).to(DEVICE)

# # same for womens data
WX_train_T = torch.tensor(
    WX_train.values,
    dtype=torch.float32,
).to(DEVICE)

WX_test_T = torch.tensor(
    WX_test.values,
    dtype=torch.float32,
).to(DEVICE)

Wy_train_T = torch.tensor(
    Wy_train.values,
    dtype=torch.float32,
).to(DEVICE)

Wy_test_T = torch.tensor(
    Wy_test.values,
    dtype=torch.float32,
).to(DEVICE)

# Generic Neural Network Framework

I am using the same neural network structure for both the mens and womens data

In [8]:
num_features = len(feature_cols)

class NiglNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.activation_func = nn.Sigmoid()
        self.layer1 = nn.Linear(num_features, 64) 
        self.layer2 = nn.Linear(64, 32)
        self.layer3 = nn.Linear(32, 16)
        self.layer4 = nn.Linear(16, 8)
        self.layer5 = nn.Linear(8, 4)
        self.layer6 = nn.Linear(4, 1)

    def forward(self, x: torch.Tensor):
        x = self.layer1(x)
        x = self.activation_func(x)
        x = self.layer2(x)
        x = self.activation_func(x)
        x = self.layer3(x)
        x = self.activation_func(x)
        x = self.layer4(x)
        x = self.activation_func(x)
        x = self.layer5(x)
        x = self.activation_func(x)
        x = self.layer6(x)
        x = self.activation_func(x)
        return x


In [10]:
# mens training loop
torch.manual_seed(2)

epochs = 10_000
nigl10k = NiglNN().to(DEVICE)
loss_fn = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(
    lr=0.001,
    params=nigl10k.parameters(),
)

for epoch in range(1, epochs + 1):
    optimizer.zero_grad()
    pred = nigl10k(MX_train_T)
    loss = loss_fn(pred, My_train_T) 
    loss.backward()
    optimizer.step()

    if epoch % 1_000 == 0:
        print(f"[{epoch} / {epochs}] Binary Cross Entropy: {loss.item()}")


[1000 / 10000] Binary Cross Entropy: 0.6770758628845215
[2000 / 10000] Binary Cross Entropy: 0.6671037077903748
[3000 / 10000] Binary Cross Entropy: 0.6648934483528137
[4000 / 10000] Binary Cross Entropy: 0.6640341281890869
[5000 / 10000] Binary Cross Entropy: 0.663619875907898
[6000 / 10000] Binary Cross Entropy: 0.6633755564689636
[7000 / 10000] Binary Cross Entropy: 0.6631807088851929
[8000 / 10000] Binary Cross Entropy: 0.663043200969696
[9000 / 10000] Binary Cross Entropy: 0.6629269123077393
[10000 / 10000] Binary Cross Entropy: 0.6629060506820679


In [11]:
nigl10k.eval()

with torch.no_grad():
    pred = nigl10k(MX_test_T)
    loss = loss_fn(pred, My_test_T)
    print(f"Binary Cross Entropy: {loss.item()}")

Binary Cross Entropy: 0.6655928492546082


In [12]:
# save model
MODEL_DIR = os.path.join("..", "models")

torch.save(
    nigl10k,
    os.path.join(MODEL_DIR, "nn10k.pth"),
)