csc404-ai / app.py
abbaac's picture
Added more imports
a23b7bb
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import gradio as gr
import pickle
# Load the Pokémon dataset
pokemon_data = pd.read_csv('pokemon_dataset.csv')
# Handle Missing Values
categorical_cols = ['type_01', 'type_02', 'ability_01', 'ability_02', 'hidden_ability', 'egg_group_01', 'egg_group_02']
pokemon_data[categorical_cols] = pokemon_data[categorical_cols].fillna('None')
numerical_cols = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']
pokemon_data[numerical_cols] = pokemon_data[numerical_cols].fillna(pokemon_data[numerical_cols].mean())
# Encode the target variable (type_01)
label_encoder = LabelEncoder()
pokemon_data['type_01'] = label_encoder.fit_transform(pokemon_data['type_01'])
# Encode categorical variables for features
categorical_cols = ['type_02', 'ability_01', 'ability_02', 'hidden_ability', 'egg_group_01', 'egg_group_02']
pokemon_data = pd.get_dummies(pokemon_data, columns=categorical_cols)
# Drop unnecessary columns
pokemon_data.drop(columns=['dex_number', 'name', 'bio', 'is_legendary'], inplace=True)
# Features and target variable
X = pokemon_data.drop(columns=['type_01']).values
y = pokemon_data['type_01'].values
# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)
class PokemonTypeClassifier(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super(PokemonTypeClassifier, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, output_size)
self.relu = nn.ReLU()
def forward(self, x):
x = self.relu(self.fc1(x))
x = self.relu(self.fc2(x))
x = self.fc3(x)
return x
# Hyperparameters
input_size = X_train.shape[1]
hidden_size = 16
output_size = len(np.unique(y)) # Number of unique types
model = PokemonTypeClassifier(input_size, hidden_size, output_size)
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training the model
num_epochs = 1000
for epoch in range(num_epochs):
model.train()
optimizer.zero_grad()
outputs = model(X_train)
loss = criterion(outputs, y_train)
loss.backward()
optimizer.step()
if (epoch + 1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')
# Evaluation function
def evaluate_model(model, X_test, y_test):
model.eval()
with torch.no_grad():
outputs = model(X_test)
_, predicted = torch.max(outputs, 1)
y_test_np = y_test.numpy()
predicted_np = predicted.numpy()
# Calculate metrics
accuracy = accuracy_score(y_test_np, predicted_np)
precision = precision_score(y_test_np, predicted_np, average='weighted')
recall = recall_score(y_test_np, predicted_np, average='weighted')
f1 = f1_score(y_test_np, predicted_np, average='weighted')
# Print metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print('\nClassification Report:')
print(classification_report(y_test_np, predicted_np, target_names=label_encoder.classes_))
# Evaluate the model
evaluate_model(model, X_test, y_test)
# Save the model state dictionary to a .pkl file
with open('model.pkl', 'wb') as f:
pickle.dump(model.state_dict(), f)
def predict(hp, attack, defense, special_attack, special_defense, speed, type_02, ability_01, ability_02, hidden_ability, egg_group_01, egg_group_02):
# Prepare the input data in the correct format
input_data = pd.DataFrame({
'hp': [hp],
'attack': [attack],
'defense': [defense],
'sp_attack': [special_attack],
'sp_defense': [special_defense],
'speed': [speed],
'type_02': [type_02],
'ability_01': [ability_01],
'ability_02': [ability_02],
'hidden_ability': [hidden_ability],
'egg_group_01': [egg_group_01],
'egg_group_02': [egg_group_02]
})
# Handle categorical variables
for col in categorical_cols:
if col not in input_data.columns:
input_data[col] = 'None'
input_data = pd.get_dummies(input_data)
# Ensure all necessary columns are present
missing_cols = set(pokemon_data.columns) - set(input_data.columns)
for col in missing_cols:
input_data[col] = 0
input_data = input_data[pokemon_data.columns.drop('type_01')]
# Standardize the features
input_data = scaler.transform(input_data)
# Convert to PyTorch tensor
input_data = torch.tensor(input_data, dtype=torch.float32)
# Load the model state from .pkl file
model = PokemonTypeClassifier(input_size, hidden_size, output_size)
with open('model.pkl', 'rb') as f:
model.load_state_dict(pickle.load(f))
model.eval()
# Make the prediction
with torch.no_grad():
output = model(input_data)
_, predicted = torch.max(output, 1)
predicted_class = label_encoder.inverse_transform(predicted.numpy())[0]
return predicted_class
iface = gr.Interface(
fn=predict,
inputs=[
gr.Number(label="HP (Hit Points)"),
gr.Number(label="Attack"),
gr.Number(label="Defense"),
gr.Number(label="Special Attack"),
gr.Number(label="Special Defense"),
gr.Number(label="Speed"),
gr.Textbox(label="Secondary type (type_02)"),
gr.Textbox(label="Ability 1 (ability_01)"),
gr.Textbox(label="Ability 2 (ability_02)"),
gr.Textbox(label="Hidden Ability (hidden_ability)"),
gr.Textbox(label="Egg Group 1 (egg_group_01)"),
gr.Textbox(label="Egg Group 2 (egg_group_02)")
],
outputs=gr.Textbox()
)
iface.launch(share=True)