Spaces:
Sleeping
Sleeping
File size: 6,203 Bytes
b502702 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# +++
import os
import uuid
import joblib
import json
# IMPORTANT: I already installed the package "gradio" in my current Virtual Environment (VEnvDSDIL_gpu_Py3.12) as: pip install -q gradio_client
# Do NOT install "gradio_client" package again in Anaconda otherwise it will mess up the package.
import gradio as gr
import pandas as pd
# must install the package "huggingface_hub" first in the current python Virtual Environment, with pip, not with conda, as follows
# pip install huggingface_hub
# i.e., in the command line interface within the activated Virtual Environment:
# (VEnvDSDIL_gpu_Py3.12) epalvarez@DSDILmStation01:~ $ pip install huggingface_hub
from huggingface_hub import CommitScheduler
from pathlib import Path
# path = Path.cwd()
log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
log_folder = log_file.parent
hf_token = os.environ.get('HF_TOKEN')
print(hf_token)
# Scheduler will log every 2 API calls:
scheduler = CommitScheduler(
repo_id="term-deposit-logs",
repo_type="dataset",
folder_path=log_folder,
path_in_repo="data",
every=2
)
term_deposit_predictor = joblib.load('model_bt.joblib')
age_input = gr.Number(label="Age")
duration_input = gr.Number(label='Duration(Sec)')
cc_contact_freq_input = gr.Number(label='CC Contact Freq')
days_since_pc_input = gr.Number(label='Days Since PC')
pc_contact_freq_input = gr.Number(label='PC Contact Freq')
job_input = gr.Dropdown(['admin.', 'blue-collar', 'technician', 'services', 'management',
'retired', 'entrepreneur', 'self-employed', 'housemaid', 'unemployed',
'student', 'unknown'], label="Job")
marital_status_input = gr.Dropdown(['married', 'single', 'divorced', 'unknown'], label='Marital Status')
education_input = gr.Dropdown(['experience', 'university degree', 'high school', 'professional.course',
'Others', 'illiterate'], label='Education')
defaulter_input = gr.Dropdown(['no', 'unknown', 'yes'], label='Defaulter')
home_loan_input = gr.Dropdown(['yes', 'no', 'unknown'], label='Home Loan')
personal_loan_input = gr.Dropdown(['yes', 'no', 'unknown'], label='Personal Loan')
communication_type_input = gr.Dropdown(['cellular', 'telephone'], label='Communication Type')
last_contacted_input = gr.Dropdown(['mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'], label='Last Contacted')
day_of_week_input = gr.Dropdown(['mon', 'tue', 'wed', 'thu', 'fri'], label='Day of Week')
pc_outcome_input = gr.Dropdown(['nonexistent', 'failure', 'success'], label='PC Outcome')
model_output = gr.Label(label="Subscribed")
# -------------------------------------------------------------------------------------------------------------------------------------------------------------
def predict_term_deposit(age, duration, cc_contact_freq, days_since_pc, pc_contact_freq, job, marital_status, education,
defaulter, home_loan, personal_loan, communication_type, last_contacted,
day_of_week, pc_outcome):
sample = {
'Age': age,
'Duration(Sec)': duration,
'CC Contact Freq': cc_contact_freq,
'Days Since PC': days_since_pc,
'PC Contact Freq': pc_contact_freq,
'Job': job,
'Marital Status': marital_status,
'Education': education,
'Defaulter': defaulter,
'Home Loan': home_loan,
'Personal Loan': personal_loan,
'Communication Type': communication_type,
'Last Contacted': last_contacted,
'Day of Week': day_of_week,
'PC Outcome': pc_outcome,
}
data_point = pd.DataFrame([sample])
prediction = term_deposit_predictor.predict(data_point).tolist()
# Push prediction to a dataset repo for logging
# Each time we get a prediction we will determine if we should log it to a hugging_face dataset according to the schedule definition outside this function
with scheduler.lock:
with log_file.open("a") as f:
f.write(json.dumps(
{
'Age': age,
'Duration(Sec)': duration,
'CC Contact Freq': cc_contact_freq,
'Days Since PC': days_since_pc,
'PC Contact Freq': pc_contact_freq,
'Job': job,
'Marital Status': marital_status,
'Education': education,
'Defaulter': defaulter,
'Home Loan': home_loan,
'Personal Loan': personal_loan,
'Communication Type': communication_type,
'Last Contacted': last_contacted,
'Day of Week': day_of_week,
'PC Outcome': pc_outcome,
'prediction': prediction[0]
}
))
f.write("\n")
return prediction[0]
# -------------------------------------------------------------------------------------------------------------------------------------------------------------
demo = gr.Interface(
fn=predict_term_deposit,
inputs=[age_input,
duration_input,
cc_contact_freq_input,
days_since_pc_input,
pc_contact_freq_input,
job_input,
marital_status_input,
education_input,
defaulter_input,
home_loan_input,
personal_loan_input,
communication_type_input,
last_contacted_input,
day_of_week_input,
pc_outcome_input],
outputs=model_output,
title="Term Deposit Prediction",
description="This API allows you to predict the person who are going to likely subscribe to the term deposit",
allow_flagging="auto", # automatically push to the HuggingFace Dataset
concurrency_limit=8
)
demo.queue()
demo.launch(share=False) # To create a public link, set "share=True" in launch() .... but if I execute this app.py locally, then I have to have my computer on for the public users to access the browser interface |