Spaces:

zama-fhe
/

encrypted_credit_scoring

Running on CPU Upgrade

File size: 3,062 Bytes

1ba3f22
 
 
9a997e4
1ba3f22
 
 
 
74c0c8e
c119738
1ba3f22
 
 
 
74c0c8e
 
 
 
9a997e4
 
18ba8c1
9a997e4
 
1ba3f22
 
 
 
 
 
 
 
9a997e4
993f2a6
c119738
a241bb3
74c0c8e
 
c119738
 
 
 
 
 
 
74c0c8e
18ba8c1
 
 
c119738
74c0c8e
 
 
 
 
9a997e4
74c0c8e
a241bb3
18ba8c1
a241bb3
 
 
31284a7
74c0c8e
 
a241bb3
9a997e4
 
 
 
 
 
 
 
 
 
31284a7
9a997e4
 
 
affc682
993f2a6
9a997e4

"All constants used in the project."

from pathlib import Path
import pandas

# The directory of this project
REPO_DIR = Path(__file__).parent

# Main necessary directories
DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
FHE_KEYS = REPO_DIR / ".fhe_keys"
CLIENT_FILES = REPO_DIR / "client_files"
SERVER_FILES = REPO_DIR / "server_files"

# ALl deployment directories
APPROVAL_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "approval_model"
EXPLAIN_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "explain_model"

# Path targeting pre-processor saved files
PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
PRE_PROCESSOR_THIRD_PARTY_PATH = DEPLOYMENT_PATH / 'pre_processor_third_party.pkl'

# Create the necessary directories
FHE_KEYS.mkdir(exist_ok=True)
CLIENT_FILES.mkdir(exist_ok=True)
SERVER_FILES.mkdir(exist_ok=True)

# Store the server's URL
SERVER_URL = "http://localhost:8000/" 

# Path to data file
DATA_PATH = "data/data.csv"

# Development settings
APPROVAL_PROCESSED_INPUT_SHAPE = (1, 39)
EXPLAIN_PROCESSED_INPUT_SHAPE = (1, 38)

CLIENT_TYPES = ["user", "bank", "third_party"]
INPUT_INDEXES = {
    "user": 0,
    "bank": 1,
    "third_party": 2,
}
APPROVAL_INPUT_SLICES = {
    "user": slice(0, 36),  # First position: start from 0
    "bank": slice(36, 37),  # Second position: start from n_feature_user
    "third_party": slice(37, 39),  # Third position: start from n_feature_user + n_feature_bank
}
EXPLAIN_INPUT_SLICES = {
    "user": slice(0, 36),  # First position: start from 0
    "bank": slice(36, 37),  # Second position: start from n_feature_user
    "third_party": slice(37, 38),  # Third position: start from n_feature_user + n_feature_bank
}

# Fix column order for pre-processing steps
USER_COLUMNS = [
    'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size', 
    'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type', 
    'Occupation_type',
]
BANK_COLUMNS = ["Account_age"]
APPROVAL_THIRD_PARTY_COLUMNS = ["Years_employed", "Employed"]
EXPLAIN_THIRD_PARTY_COLUMNS = ["Employed"]

_data = pandas.read_csv(DATA_PATH, encoding="utf-8")

def get_min_max(data, column):
    """Get min/max values of a column in order to input them in Gradio's API as key arguments."""
    return {
        "minimum": int(data[column].min()),
        "maximum": int(data[column].max()), 
    }

# App data min and max values
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
AGE_MIN_MAX = get_min_max(_data, "Age")
EMPLOYED_MIN_MAX = get_min_max(_data, "Years_employed")
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")

# App data choices 
INCOME_TYPES = list(_data["Income_type"].unique())
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
HOUSING_TYPES = list(_data["Housing_type"].unique())
EDUCATION_TYPES = list(_data["Education_type"].unique())
FAMILY_STATUS = list(_data["Family_status"].unique())