import streamlit as st import base64 from io import BytesIO import pandas as pd import numpy as np import pathlib import os from joblib import load import common from common import log_verbose file = st.file_uploader(label="Upload file for prediction", accept_multiple_files=False, type=["csv"], help="Select file to perform prediction on") #if selection made if file is not None: #prepare for possible multiple files input_df = common.preprocess_input_file(file) shapes_df = input_df.drop(columns=['id']) print('Found file with following dimensions: ' + str(shapes_df.shape)) # normalize input X_predict = common.apply_scaler(shapes_df) # apply PCA X_predict = common.check_apply_pca(X_predict) # perform all defined predictions for model_full_file in pathlib.Path(common.model_directory()).glob('*' + common.model_suffix): model_file = os.path.basename(model_full_file) model_name = model_file[:-len(common.model_suffix)] log_verbose(' Retrieving prediction model based on ' + model_name + ' from file: ' + str(model_full_file)) if model_name in common.one_hot_encoded: print(model_name + ' currently not supported') else: clf = load(model_full_file) input_df = common.append_predictions(clf, X_predict, model_name, input_df) # Get all entries for the "RandomForestClassifier" column random_forest_classifier_entries = input_df['RandomForestClassifier'] # Find the most frequent entry most_frequent_entry = random_forest_classifier_entries.value_counts().idxmax() # Print the most frequent entry st.write(" ") #st.markdown(f"Dataset classified as: **{most_frequent_entry}**