Spaces:

nethke0009
/

Telemarketing_Predictor

Sleeping

App Files Files Community

nethke0009 commited on May 13

Commit

9e9465a

•

1 Parent(s): fdef2dc

Upload 4 files

Browse files

Files changed (4) hide show

Bank_Telemarketing.csv +0 -0
app.py +124 -0
requirements.txt +1 -0
train.py +80 -0

Bank_Telemarketing.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import uuid
+import joblib
+import json
+import gradio as gr
+import pandas as pd
+from huggingface_hub import CommitScheduler
+from pathlib import Path
+os.system("python train.py")
+term_deposit_predictor = joblib.load('model.joblib')
+log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
+log_folder = log_file.parent
+scheduler = CommitScheduler(
+    repo_id="term-deposit-logs",
+    repo_type="dataset",
+    folder_path=log_folder,
+    path_in_repo="data",
+    every=2
+)
+age_input = gr.Number(label="Age")
+duration_input = gr.Number(label='Duration(Sec)')
+cc_contact_freq_input = gr.Number(label='CC Contact Freq')
+days_since_pc_input = gr.Number(label='Days Since PC')
+pc_contact_freq_input = gr.Number(label='Pc Contact Freq')
+job_input = gr.Dropdown(['admin.', 'blue-collar', 'technician', 'services', 'management',
+       'retired', 'entrepreneur', 'self-employed', 'housemaid', 'unemployed',
+       'student', 'unknown'],label="Job")
+marital_input = gr.Dropdown(['married', 'single', 'divorced', 'unknown'],label='Marital Status')
+education_input = gr.Dropdown(['experience', 'university degree', 'high school', 'professional.course',
+       'Others', 'illiterate'],label='Education')
+defaulter_input = gr.Dropdown(['no', 'unknown', 'yes'],label='Defaulter')
+home_loan_input = gr.Dropdown(['yes', 'no', 'unknown'],label='Home Loan')
+personal_loan_input = gr.Dropdown(['yes', 'no', 'unknown'],label='Personal Loan')
+communication_type_input = gr.Dropdown(['cellular', 'telephone'],label='Communication Type')
+last_contacted_input = gr.Dropdown(['may', 'jul', 'aug', 'jun', 'nov', 'apr', 'oct', 'mar', 'sep', 'dec'],label='Last Contacted')
+day_of_week_input = gr.Dropdown(['thu', 'mon', 'wed', 'tue', 'fri'],label='Day of Week')
+pc_outcome_input = gr.Dropdown(['nonexistent', 'failure', 'success'], label='PC Outcome')
+model_output = gr.Label(label="Subscribed")
+def predict_term_deposit(age, duration, cc_contact_freq, days_since_pc, pc_contact_freq, job, marital_status, education,
+                         defaulter, home_loan, personal_loan, communication_type, last_contacted,
+                         day_of_week, pc_outcome):
+    sample = {
+        'Age': age,
+        'Duration(Sec)': duration,
+        'CC Contact Freq': cc_contact_freq,
+        'Days Since PC': days_since_pc,
+        'PC Contact Freq': pc_contact_freq,
+        'Job': job,
+        'Marital Status': marital_status,
+        'Education': education,
+        'Defaulter': defaulter,
+        'Home Loan': home_loan,
+        'Personal Loan': personal_loan,
+        'Communication Type': communication_type,
+        'Last Contacted': last_contacted,
+        'Day of Week': day_of_week,
+        'PC Outcome': pc_outcome,
+    }
+    data_point = pd.DataFrame([sample])
+    prediction = term_deposit_predictor.predict(data_point).tolist()
+    with scheduler.lock:
+        with log_file.open("a") as f:
+            f.write(json.dumps(
+                {
+                    'Age': age,
+                    'Duration(Sec)': duration,
+                    'CC Contact Freq': cc_contact_freq,
+                    'Days Since PC': days_since_pc,
+                    'PC Contact Freq': pc_contact_freq,
+                    'Job': job,
+                    'Marital Status': marital_status,
+                    'Education': education,
+                    'Defaulter': defaulter,
+                    'Home Loan': home_loan,
+                    'Personal Loan': personal_loan,
+                    'Communication Type': communication_type,
+                    'Last Month Contacted': last_contacted,
+                    'Day of Week': day_of_week,
+                    'PC Outcome': pc_outcome,
+                    'prediction': prediction[0]
+                }
+            ))
+            f.write("\n")
+    return prediction[0]
+demo = gr.Interface(
+    fn=predict_term_deposit,
+    inputs=[age_input,
+            duration_input,
+            cc_contact_freq_input,
+            days_since_pc_input,
+            pc_contact_freq_input,
+            job_input,
+            marital_input,
+            education_input,
+            defaulter_input,
+            home_loan_input,
+            personal_loan_input,
+            communication_type_input,
+            last_contacted_input,
+            day_of_week_input,
+            pc_outcome_input],
+    outputs=model_output,
+    title="Term Deposit Prediction",
+    description="This API allows you to predict the person who are going to likely subscribe the term deposit",
+    allow_flagging="auto",
+    concurrency_limit=10
+)
+demo.queue()
+demo.launch(share=False)

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ scikit-learn==1.4.2

train.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import joblib
+import pandas as pd
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.compose import make_column_transformer
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import Pipeline
+from sklearn.pipeline import make_pipeline
+from sklearn.model_selection import train_test_split, RandomizedSearchCV
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score, classification_report
+data_df = pd.read_csv("Bank_Telemarketing.csv")
+target = 'subscribed'
+numerical_features = ['Age', 'Duration(Sec)', 'CC Contact Freq', 'Days Since PC','PC Contact Freq']
+categorical_features = ['Job', 'Marital Status', 'Education', 'Defaulter', 'Home Loan',
+       'Personal Loan', 'Communication Type', 'Last Contacted', 'Day of Week',
+       'PC Outcome']
+print("Creating data subsets")
+X = data_df[numerical_features + categorical_features]
+y = data_df[target]
+Xtrain, Xtest, ytrain, ytest = train_test_split(
+    X, y,
+    test_size=0.2,
+    random_state=42
+)
+numerical_pipeline = Pipeline([
+    ('imputer', SimpleImputer(strategy='median')),
+    ('scaler', StandardScaler())
+])
+categorical_pipeline = Pipeline([
+    ('imputer', SimpleImputer(strategy='most_frequent')),
+    ('onehot', OneHotEncoder(handle_unknown='ignore'))
+])
+preprocessor = make_column_transformer(
+    (numerical_pipeline, numerical_features),
+    (categorical_pipeline, categorical_features)
+)
+model_logistic_regression = LogisticRegression(n_jobs=-1)
+print("Estimating Best Model Pipeline")
+model_pipeline = make_pipeline(
+    preprocessor,
+    model_logistic_regression
+)
+param_distribution = {
+    "logisticregression__C": [0.001, 0.01, 0.1, 0.5, 1, 5, 10]
+}
+rand_search_cv = RandomizedSearchCV(
+    model_pipeline,
+    param_distribution,
+    n_iter=3,
+    cv=3,
+    random_state=42
+)
+rand_search_cv.fit(Xtrain, ytrain)
+print("Logging Metrics")
+print(f"Accuracy: {rand_search_cv.best_score_}")
+print("Serializing Model")
+saved_model_path = "model.joblib"
+joblib.dump(rand_search_cv.best_estimator_, saved_model_path)