File size: 6,203 Bytes
b502702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
# +++
import os
import uuid
import joblib
import json

# IMPORTANT: I already installed the package "gradio" in my current Virtual Environment (VEnvDSDIL_gpu_Py3.12) as:  pip install -q gradio_client
#            Do NOT install "gradio_client" package again in Anaconda otherwise it will mess up the package.
import gradio as gr
import pandas as pd

# must install the package "huggingface_hub" first in the current python Virtual Environment, with pip, not with conda, as follows
# pip install huggingface_hub
# i.e., in the command line interface within the activated Virtual Environment:
#  (VEnvDSDIL_gpu_Py3.12) epalvarez@DSDILmStation01:~ $ pip install huggingface_hub
from huggingface_hub import CommitScheduler
from pathlib import Path

# path = Path.cwd()

log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent

hf_token = os.environ.get('HF_TOKEN')
print(hf_token)

# Scheduler will log every 2 API calls:
scheduler = CommitScheduler(
    repo_id="term-deposit-logs",
    repo_type="dataset",
    folder_path=log_folder,
    path_in_repo="data",
    every=2
)

term_deposit_predictor = joblib.load('model_bt.joblib')

age_input = gr.Number(label="Age")
duration_input = gr.Number(label='Duration(Sec)')
cc_contact_freq_input = gr.Number(label='CC Contact Freq')
days_since_pc_input = gr.Number(label='Days Since PC')
pc_contact_freq_input = gr.Number(label='PC Contact Freq')
job_input = gr.Dropdown(['admin.', 'blue-collar', 'technician', 'services', 'management',
        'retired', 'entrepreneur', 'self-employed', 'housemaid', 'unemployed',
        'student', 'unknown'], label="Job")
marital_status_input = gr.Dropdown(['married', 'single', 'divorced', 'unknown'], label='Marital Status')
education_input = gr.Dropdown(['experience', 'university degree', 'high school', 'professional.course',
        'Others', 'illiterate'], label='Education')
defaulter_input = gr.Dropdown(['no', 'unknown', 'yes'], label='Defaulter')
home_loan_input = gr.Dropdown(['yes', 'no', 'unknown'], label='Home Loan')
personal_loan_input = gr.Dropdown(['yes', 'no', 'unknown'], label='Personal Loan')
communication_type_input = gr.Dropdown(['cellular', 'telephone'], label='Communication Type')
last_contacted_input = gr.Dropdown(['mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'], label='Last Contacted')
day_of_week_input = gr.Dropdown(['mon', 'tue', 'wed', 'thu', 'fri'], label='Day of Week')
pc_outcome_input = gr.Dropdown(['nonexistent', 'failure', 'success'], label='PC Outcome')


model_output = gr.Label(label="Subscribed")

# -------------------------------------------------------------------------------------------------------------------------------------------------------------
def predict_term_deposit(age, duration, cc_contact_freq, days_since_pc, pc_contact_freq, job, marital_status, education, 

                            defaulter, home_loan, personal_loan, communication_type, last_contacted, 

                            day_of_week, pc_outcome):
    sample = {
        'Age': age,
        'Duration(Sec)': duration,
        'CC Contact Freq': cc_contact_freq,
        'Days Since PC': days_since_pc,
        'PC Contact Freq': pc_contact_freq,
        'Job': job,
        'Marital Status': marital_status,
        'Education': education,
        'Defaulter': defaulter,
        'Home Loan': home_loan,
        'Personal Loan': personal_loan,
        'Communication Type': communication_type,
        'Last Contacted': last_contacted,
        'Day of Week': day_of_week,
        'PC Outcome': pc_outcome,
    }
    data_point = pd.DataFrame([sample])
    prediction = term_deposit_predictor.predict(data_point).tolist()

    # Push prediction to a dataset repo for logging
    # Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the schedule definition outside this function
    with scheduler.lock:
        with log_file.open("a") as f:
            f.write(json.dumps(
                {
                    'Age': age,
                    'Duration(Sec)': duration,
                    'CC Contact Freq': cc_contact_freq,
                    'Days Since PC': days_since_pc,
                    'PC Contact Freq': pc_contact_freq,
                    'Job': job,
                    'Marital Status': marital_status,
                    'Education': education,
                    'Defaulter': defaulter,
                    'Home Loan': home_loan,
                    'Personal Loan': personal_loan,
                    'Communication Type': communication_type,
                    'Last Contacted': last_contacted,
                    'Day of Week': day_of_week,
                    'PC Outcome': pc_outcome,
                    'prediction': prediction[0]
                }
            ))
            f.write("\n")
            
    return prediction[0]
# -------------------------------------------------------------------------------------------------------------------------------------------------------------

demo = gr.Interface(
    fn=predict_term_deposit,
    inputs=[age_input,
            duration_input,
            cc_contact_freq_input,
            days_since_pc_input,
            pc_contact_freq_input,
            job_input,
            marital_status_input,
            education_input,
            defaulter_input,
            home_loan_input,
            personal_loan_input,
            communication_type_input,
            last_contacted_input,
            day_of_week_input,
            pc_outcome_input],
    outputs=model_output,
    title="Term Deposit Prediction",
    description="This API allows you to predict the person who are going to likely subscribe to the term deposit",
    allow_flagging="auto",    # automatically push to the HuggingFace Dataset
    concurrency_limit=8
)

demo.queue()
demo.launch(share=False)    # To create a public link, set "share=True" in launch() ....  but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface