Spaces:
Paused
Paused
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from xgboost import XGBClassifier | |
| from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
| from sklearn.preprocessing import StandardScaler | |
| from imblearn.over_sampling import SMOTE | |
| import gradio as gr | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import io | |
| import zipfile | |
| import joblib | |
| from PIL import Image | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # Function to load and preprocess data | |
| def load_and_preprocess_data(file): | |
| try: | |
| data = pd.read_csv(file.name) | |
| # Convert suits and ranks to numerical values | |
| suit_order = {'spades': 0, 'hearts': 1, 'clubs': 2, 'diamonds': 3} | |
| rank_order = {'ace': 0, '2': 1, '3': 2, '4': 3, '5': 4, '6': 5, '7': 6, '8': 7, '9': 8, '10': 9, | |
| 'jack': 10, 'queen': 11, 'king': 12} | |
| data['Dragon Suit Num'] = data['Dragon Suit'].map(suit_order) | |
| data['Dragon Rank Num'] = data['Dragon Rank'].map(rank_order) | |
| data['Tiger Suit Num'] = data['Tiger Suit'].map(suit_order) | |
| data['Tiger Rank Num'] = data['Tiger Rank'].map(rank_order) | |
| data['Lion Suit Num'] = data['Lion Suit'].map(suit_order) | |
| data['Lion Rank Num'] = data['Lion Rank'].map(rank_order) | |
| return data, None | |
| except Exception as e: | |
| return None, f"Error loading data: {str(e)}" | |
| # Feature engineering | |
| def create_features(data, n_games=3): | |
| features = [] | |
| for i in range(n_games, len(data)): | |
| game_features = [] | |
| for j in range(1, n_games + 1): | |
| game_features.extend([ | |
| data['Dragon Suit Num'].iloc[i - j], | |
| data['Dragon Rank Num'].iloc[i - j], | |
| data['Tiger Suit Num'].iloc[i - j], | |
| data['Tiger Rank Num'].iloc[i - j], | |
| data['Lion Suit Num'].iloc[i - j], | |
| data['Lion Rank Num'].iloc[i - j] | |
| ]) | |
| for j in range(1, n_games + 1): | |
| game_features.extend([ | |
| data['Dragon Suit Num'].iloc[i - j] * data['Dragon Rank Num'].iloc[i - j], | |
| data['Tiger Suit Num'].iloc[i - j] * data['Tiger Rank Num'].iloc[i - j], | |
| data['Lion Suit Num'].iloc[i - j] * data['Lion Rank Num'].iloc[i - j] | |
| ]) | |
| recent_games = data.iloc[i-n_games:i] | |
| suit_freq = recent_games[['Dragon Suit Num', 'Tiger Suit Num', 'Lion Suit Num']].values.flatten() | |
| rank_freq = recent_games[['Dragon Rank Num', 'Tiger Rank Num', 'Lion Rank Num']].values.flatten() | |
| game_features.extend([ | |
| np.mean(suit_freq), np.std(suit_freq), | |
| np.mean(rank_freq), np.std(rank_freq) | |
| ]) | |
| features.append(game_features) | |
| columns = ([f'{hand}_{attr}_t-{j}' for j in range(1, n_games + 1) | |
| for hand in ['Dragon', 'Tiger', 'Lion'] for attr in ['Suit', 'Rank']] + | |
| [f'{hand}_suit_rank_inter_t-{j}' for j in range(1, n_games + 1) | |
| for hand in ['Dragon', 'Tiger', 'Lion']] + | |
| ['suit_mean', 'suit_std', 'rank_mean', 'rank_std']) | |
| return pd.DataFrame(features, columns=columns) | |
| # Function to plot confusion matrix | |
| def plot_confusion_matrix(y_true, y_pred, title): | |
| cm = confusion_matrix(y_true, y_pred) | |
| plt.figure(figsize=(6, 4)) | |
| sns.heatmap(cm, annot=True, fmt='d', cmap='Blues') | |
| plt.title(title) | |
| plt.xlabel('Predicted') | |
| plt.ylabel('Actual') | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| buf.seek(0) | |
| img = Image.open(buf) | |
| plt.close() | |
| return img | |
| # Function to plot accuracy bar chart | |
| def plot_accuracy_chart(accuracies): | |
| plt.figure(figsize=(8, 5)) | |
| plt.bar(accuracies.keys(), accuracies.values(), color='skyblue') | |
| plt.title('Model Accuracy Comparison') | |
| plt.ylabel('Accuracy') | |
| plt.xticks(rotation=45) | |
| plt.ylim(0, 1) | |
| buf = io.BytesIO() | |
| plt.savefig(buf, format='png') | |
| buf.seek(0) | |
| img = Image.open(buf) | |
| plt.close() | |
| return img | |
| # Function to create a ZIP file of models | |
| def create_model_zip(models): | |
| zip_buffer = io.BytesIO() | |
| with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file: | |
| for model_name, model in models.items(): | |
| model_buffer = io.BytesIO() | |
| joblib.dump(model, model_buffer) | |
| model_buffer.seek(0) | |
| zip_file.writestr(f"{model_name}_model.pkl", model_buffer.getvalue()) | |
| zip_buffer.seek(0) | |
| return zip_buffer | |
| # Training function with progress tracking and model saving | |
| def train_model(file, n_estimators, learning_rate, max_depth, subsample, progress=gr.Progress()): | |
| progress(0, desc="Starting...") | |
| results = [] | |
| try: | |
| # Load and preprocess data | |
| progress(0.1, desc="Loading and preprocessing data...") | |
| data, error = load_and_preprocess_data(file) | |
| if error: | |
| return error, None, None, None | |
| # Create features | |
| progress(0.2, desc="Engineering features...") | |
| n_games = 3 | |
| features = create_features(data, n_games) | |
| targets = { | |
| 'dragon_suit': data['Dragon Suit Num'][n_games:], | |
| 'dragon_rank': data['Dragon Rank Num'][n_games:], | |
| 'tiger_suit': data['Tiger Suit Num'][n_games:], | |
| 'tiger_rank': data['Tiger Rank Num'][n_games:], | |
| 'lion_suit': data['Lion Suit Num'][n_games:], | |
| 'lion_rank': data['Lion Rank Num'][n_games:] | |
| } | |
| # Scale features | |
| progress(0.3, desc="Scaling features...") | |
| scaler = StandardScaler() | |
| features_scaled = scaler.fit_transform(features) | |
| features_scaled = pd.DataFrame(features_scaled, columns=features.columns) | |
| accuracies = {} | |
| confusion_matrices = [] | |
| trained_models = {} | |
| # Train models | |
| for i, (target_name, target) in enumerate(targets.items()): | |
| progress(0.4 + (i / len(targets)) * 0.4, desc=f"Training {target_name} model...") | |
| # Split data | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| features_scaled, target, test_size=0.2, random_state=42 | |
| ) | |
| # Apply SMOTE | |
| smote = SMOTE(random_state=42) | |
| X_train_res, y_train_res = smote.fit_resample(X_train, y_train) | |
| # Train model | |
| model = XGBClassifier( | |
| random_state=42, | |
| eval_metric='mlogloss', | |
| n_estimators=int(n_estimators), | |
| learning_rate=float(learning_rate), | |
| max_depth=int(max_depth), | |
| subsample=float(subsample) | |
| ) | |
| model.fit( | |
| X_train_res, | |
| y_train_res, | |
| eval_set=[(X_test, y_test)], | |
| early_stopping_rounds=10, | |
| verbose=False | |
| ) | |
| # Save model | |
| trained_models[target_name] = model | |
| # Evaluate | |
| y_pred = model.predict(X_test) | |
| accuracy = accuracy_score(y_test, y_pred) | |
| report = classification_report(y_test, y_pred, zero_division=0) | |
| accuracies[target_name] = accuracy | |
| results.append(f"**{target_name} Results**\n") | |
| results.append(f"Accuracy: {accuracy:.2f}\n") | |
| results.append(f"Classification Report:\n{report}\n") | |
| # Generate confusion matrix plot | |
| cm_plot = plot_confusion_matrix(y_test, y_pred, f"Confusion Matrix - {target_name}") | |
| confusion_matrices.append(cm_plot) | |
| progress(0.9, desc="Generating visualizations and model archive...") | |
| # Generate accuracy bar chart | |
| accuracy_plot = plot_accuracy_chart(accuracies) | |
| # Create ZIP file of models | |
| model_zip = create_model_zip(trained_models) | |
| progress(1.0, desc="Completed!") | |
| return "\n".join(results), accuracy_plot, confusion_matrices, model_zip | |
| except Exception as e: | |
| return f"Error during training: {str(e)}", None, None, None | |
| # Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Card Game Prediction Model Training") | |
| gr.Markdown("Upload the training dataset and configure hyperparameters to train the model. Track progress, view results, and download trained models.") | |
| file_input = gr.File(label="Upload TRAINING_CARD_DATA.csv") | |
| n_estimators = gr.Slider(50, 300, value=100, step=10, label="Number of Estimators") | |
| learning_rate = gr.Slider(0.01, 0.3, value=0.1, step=0.01, label="Learning Rate") | |
| max_depth = gr.Slider(3, 10, value=5, step=1, label="Max Depth") | |
| subsample = gr.Slider(0.5, 1.0, value=0.8, step=0.1, label="Subsample") | |
| train_button = gr.Button("Train Model") | |
| output_text = gr.Textbox(label="Training Results") | |
| accuracy_plot = gr.Image(label="Accuracy Comparison") | |
| confusion_plots = gr.Gallery(label="Confusion Matrices") | |
| model_download = gr.File(label="Download Trained Models (ZIP)") | |
| train_button.click( | |
| fn=train_model, | |
| inputs=[file_input, n_estimators, learning_rate, max_depth, subsample], | |
| outputs=[output_text, accuracy_plot, confusion_plots, model_download] | |
| ) | |
| demo.launch() |