File size: 2,355 Bytes
4ec7aed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pandas as pd
from src.exception import CustomException
from src.logger import logging
from src.utils import load_object
import os
import sys

def load_model_and_scaler():
    try:
        model_path = os.path.join('artifacts', 'kmeans_model.pkl')
        scaler_path = os.path.join('artifacts', 'scaler.pkl')
        model = load_object(file_path=model_path)
        scaler = load_object(file_path=scaler_path)
        return model, scaler
    except Exception as e:
        raise CustomException(e, sys)

def predict_cluster(age, work_experience, household_size, living_standards):
    try:
        model, scaler = load_model_and_scaler()
        
        input_data = pd.DataFrame({
            'Age': [age], 
            'Work_Experience': [work_experience], 
            'Household_Size': [household_size], 
            'Income': [living_standards]
        })

        income_mapping = {"Low": 64207.0, "Medium": 77808.0, "High": 2485100.0}
        input_data['Income'] = input_data['Income'].map(income_mapping)

        X_scaled = scaler.transform(input_data)
        predict = model.predict(X_scaled)
        
        return predict[0]
    
    except Exception as e:
        raise CustomException(e, sys)




'''

def clustering():

    # Load and map data

    data = pd.read_csv('kmeans.csv')



    # Standard Scaling 

    scaler = StandardScaler()

    X_scaled = scaler.fit_transform(data)



    # Apply K-means clustering

    kmeans = KMeans(n_clusters=3, random_state=0)

    clusters = kmeans.fit_predict(X_scaled)

    return kmeans, scaler, kmeans, clusters



def predict_cluster(age, work_experience, household_size, living_standards):



    # Set up the model and scaler

    model, scaler, kmeans, clusters = clustering()

    

    # Create the DataFrame using a dictionary

    input_data = pd.DataFrame({'age': age, 'work_experience': work_experience, 'household_size': household_size, 'living_standards': living_standards})



    # Define the mapping dictionary

    income_mapping = {"Low": 64207.0, "Medium": 77808.0, "High": 2485100.0}



    # Apply the mapping

    input_data['living_standards'] = input_data['living_standards'].map(income_mapping)



    X_scaled = scaler.fit_transform(input_data)

    predict = kmeans.predict(X_scaled)

    return predict[0]



'''