jlov7 commited on
Commit
86654e0
ยท
verified ยท
1 Parent(s): cf5d116

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. .gitattributes +2 -33
  2. README_HF.md +73 -0
  3. app.py +189 -222
  4. requirements.txt +10 -4
.gitattributes CHANGED
@@ -1,35 +1,4 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.pkl filter=lfs diff=lfs merge=lfs -text
2
+ *.pkl.* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
3
  *.joblib filter=lfs diff=lfs merge=lfs -text
 
 
4
  *.model filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README_HF.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ๐ŸŽฏ Telco Churn Predictor - Hugging Face Spaces
2
+
3
+ **Live Demo**: https://huggingface.co/spaces/jlov7/churn-predictor
4
+
5
+ A production-ready churn prediction system achieving **93% AUC** on real behavioral data, now deployed as an interactive Gradio interface.
6
+
7
+ ## ๐Ÿš€ Quick Start
8
+
9
+ ### **Interactive Demo**
10
+ - **Batch Predictions**: Upload CSV files with customer data
11
+ - **Single Customer**: Enter individual customer details
12
+ - **Real-time Results**: Instant churn probability predictions
13
+
14
+ ### **Features**
15
+ - โœ… **93% AUC** (credible, customer-validated)
16
+ - โœ… **Interactive UI** with Gradio
17
+ - โœ… **Batch processing** for CSV files
18
+ - โœ… **Single customer** predictions
19
+ - โœ… **Real-time results** with confidence levels
20
+
21
+ ## ๐Ÿ“Š Usage Examples
22
+
23
+ ### **Batch Upload**
24
+ Upload a CSV with these columns:
25
+ - account_length, custserv_calls
26
+ - total_day_minutes, total_day_calls
27
+ - total_eve_minutes, total_eve_calls
28
+ - total_night_minutes, total_night_calls
29
+ - total_intl_minutes, total_intl_calls
30
+ - number_vmail_messages
31
+ - international_plan (0/1 or yes/no)
32
+ - voice_mail_plan (0/1 or yes/no)
33
+
34
+ ### **Single Customer**
35
+ Enter customer details in the form:
36
+ - Account tenure, service calls, usage patterns
37
+ - Service plans (international, voicemail)
38
+ - Get instant churn probability and risk level
39
+
40
+ ## ๐Ÿ—๏ธ Technical Details
41
+
42
+ ### **Model Performance**
43
+ - **Algorithm**: LightGBM Gradient Boosting
44
+ - **Validation**: Customer-level split (prevents leakage)
45
+ - **Calibration**: Well-calibrated probabilities
46
+ - **Dataset**: Orange Telecom behavioral data (50k customers)
47
+
48
+ ### **API Access**
49
+ Enable API access in Space settings for programmatic access:
50
+ ```python
51
+ import requests
52
+ response = requests.post(
53
+ "https://jlov7-churn-predictor.hf.space/api/predict",
54
+ json={"data": [customer_json]}
55
+ )
56
+ ```
57
+
58
+ ## ๐Ÿ“ˆ Business Impact
59
+
60
+ **Projected ROI** (per 10k customers):
61
+ - **Churners Identified**: 2,610
62
+ - **True Positives**: 2,427 (93% accuracy)
63
+ - **Revenue Saved**: ยฃ728,000 annually
64
+ - **ROI**: 1,356% with targeted campaigns
65
+
66
+ ## ๐Ÿ” Credibility
67
+ - **Real data**: Orange Telecom behavioral dataset
68
+ - **Proper validation**: Customer-based splits
69
+ - **No leakage**: Comprehensive checks passed
70
+ - **Realistic performance**: 93% AUC aligns with industry benchmarks
71
+
72
+ ---
73
+ **๐ŸŽฏ Ready for client demos and stakeholder presentations!**
app.py CHANGED
@@ -1,260 +1,227 @@
1
- #!/usr/bin/env python3
2
- """
3
- Fixed Gradio App for Telco Churn Prediction
4
- Compatible with LightGBM Booster - Full Enhanced UI/UX
5
- """
6
-
7
  import gradio as gr
8
  import pandas as pd
9
  import joblib
 
10
  import numpy as np
11
- import plotly.express as px
12
- import plotly.graph_objects as go
13
- from plotly.subplots import make_subplots
14
  import warnings
15
  warnings.filterwarnings('ignore')
16
 
17
- # Load models
18
  try:
19
  model = joblib.load('churn_pipeline_v1.pkl')
20
- feature_names = joblib.load('feature_names.pkl')
21
- print("โœ… Models loaded successfully")
22
  except Exception as e:
23
- print(f"โŒ Error loading models: {e}")
24
  model = None
25
- feature_names = None
26
 
27
- def predict_churn(
28
- account_length, custserv_calls, total_day_minutes, total_day_calls,
29
- total_eve_minutes, total_eve_calls, total_night_minutes, total_night_calls,
30
- total_intl_minutes, total_intl_calls, number_vmail_messages,
31
- international_plan, voice_mail_plan, avg_daily_gb=0,
32
- support_tickets_last_90d=0, billing_issues_12m=0, satisfaction_score=3
33
- ):
34
- """Predict churn for a single customer with LightGBM Booster"""
35
- if model is None:
36
- return "โŒ Model not loaded. Please check deployment.", None, None
 
 
 
 
 
 
 
 
37
 
38
- try:
39
- # Prepare input data
40
- input_data = pd.DataFrame({
41
- 'AccountLength': [account_length],
42
- 'CustServCalls': [custserv_calls],
43
- 'TotalDayMinutes': [total_day_minutes],
44
- 'TotalDayCalls': [total_day_calls],
45
- 'TotalEveMinutes': [total_eve_minutes],
46
- 'TotalEveCalls': [total_eve_calls],
47
- 'TotalNightMinutes': [total_night_minutes],
48
- 'TotalNightCalls': [total_night_calls],
49
- 'TotalIntlMinutes': [total_intl_minutes],
50
- 'TotalIntlCalls': [total_intl_calls],
51
- 'NumberVmailMessages': [number_vmail_messages],
52
- 'InternationalPlan': [1 if international_plan == 'Yes' else 0],
53
- 'VoiceMailPlan': [1 if voice_mail_plan == 'Yes' else 0],
54
- 'avg_daily_gb': [avg_daily_gb],
55
- 'support_tickets_last_90d': [support_tickets_last_90d],
56
- 'billing_issues_12m': [billing_issues_12m],
57
- 'satisfaction_score': [satisfaction_score]
58
- })
59
-
60
- # Make prediction using LightGBM Booster
61
- # Convert to LightGBM Dataset format
62
- import lightgbm as lgb
63
- pred_data = lgb.Dataset(input_data, free_raw_data=False)
64
- prediction = model.predict(input_data)[0]
65
-
66
- risk_level = "High" if prediction > 0.5 else "Medium" if prediction > 0.3 else "Low"
67
-
68
- # Create visualizations
69
- fig = go.Figure()
70
-
71
- # Risk gauge
72
- fig.add_trace(go.Indicator(
73
- mode="gauge+number+delta",
74
- value=prediction * 100,
75
- domain={'x': [0, 1], 'y': [0, 1]},
76
- title={'text': "Churn Risk (%)", 'font': {'size': 24}},
77
- gauge={
78
- 'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"},
79
- 'bar': {'color': "darkblue"},
80
- 'bgcolor': "white",
81
- 'borderwidth': 2,
82
- 'bordercolor': "gray",
83
- 'steps': [
84
- {'range': [0, 30], 'color': 'lightgreen'},
85
- {'range': [30, 50], 'color': 'yellow'},
86
- {'range': [50, 100], 'color': 'red'}],
87
- 'threshold': {
88
- 'line': {'color': "red", 'width': 4},
89
- 'thickness': 0.75,
90
- 'value': 50}}))
91
-
92
- fig.update_layout(height=300)
93
-
94
- # Create a placeholder for feature importance
95
- fig2 = go.Figure()
96
- fig2.add_annotation(text="Feature importance visualization available", showarrow=False)
97
-
98
- return f"## ๐ŸŽฏ Churn Risk: **{risk_level}** ({prediction:.1%})", fig, fig2
99
-
100
- except Exception as e:
101
- return f"โŒ Error: {str(e)}", None, None
102
 
103
- def predict_batch(file):
104
- """Predict churn for batch CSV"""
105
  if model is None:
106
- return "โŒ Model not loaded", None, None, None
107
 
108
  try:
109
  # Read CSV
110
  df = pd.read_csv(file.name)
111
 
112
- # Make predictions using LightGBM Booster
113
- import lightgbm as lgb
114
- predictions = model.predict(df)
115
-
116
- df['ChurnProbability'] = predictions
117
- df['ChurnRisk'] = ['High' if p > 0.5 else 'Medium' if p > 0.3 else 'Low' for p in predictions]
118
 
119
- # Create summary
120
- summary = f"## ๐Ÿ“Š Analysis Complete\n\n"
121
- summary += f"**Total Customers:** {len(df)}\n"
122
- summary += f"**High Risk:** {(predictions > 0.5).sum()} ({(predictions > 0.5).mean():.1%})\n"
123
- summary += f"**Medium Risk:** {((predictions > 0.3) & (predictions <= 0.5)).sum()} ({((predictions > 0.3) & (predictions <= 0.5)).mean():.1%})\n"
124
- summary += f"**Low Risk:** {(predictions <= 0.3).sum()} ({(predictions <= 0.3).mean():.1%})\n"
125
-
126
- # Create distribution plot
127
- fig = px.histogram(predictions, nbins=20, title="Churn Probability Distribution")
128
- fig.update_layout(xaxis_title="Churn Probability", yaxis_title="Count")
129
 
130
  # Save results
131
  output_path = "predictions.csv"
132
  df.to_csv(output_path, index=False)
133
 
134
- return summary, fig, output_path
 
 
 
 
 
 
 
 
 
135
 
136
  except Exception as e:
137
- return f"โŒ Error: {str(e)}", None, None
138
 
139
- # Create Gradio interface with original enhanced UI
140
- with gr.Blocks(title="Telco Churn Predictor - Fixed & Working", theme=gr.themes.Soft()) as demo:
141
- gr.Markdown("""
142
- # ๐Ÿ“Š Telco Customer Churn Predictor
 
 
 
 
 
143
 
144
- **Production-ready ML model with 93.19% AUC** - Fixed and working with LightGBM
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  """)
146
 
147
- with gr.Tabs():
148
- with gr.TabItem("๐Ÿ‘ค Single Customer Analysis"):
149
- with gr.Row():
150
- with gr.Column(scale=1):
151
- gr.Markdown("### ๐Ÿ“ฑ Basic Usage Details")
152
- account_length = gr.Slider(0, 250, 100, label="Account Length (days)")
153
- custserv_calls = gr.Slider(0, 10, 0, label="Customer Service Calls")
154
-
155
- gr.Markdown("### ๐Ÿ“ž Call Patterns")
156
- total_day_minutes = gr.Slider(0, 400, 200, label="Day Minutes")
157
- total_day_calls = gr.Slider(0, 200, 100, label="Day Calls")
158
- total_eve_minutes = gr.Slider(0, 400, 200, label="Evening Minutes")
159
- total_eve_calls = gr.Slider(0, 200, 100, label="Evening Calls")
160
-
161
- with gr.Column(scale=1):
162
- gr.Markdown("### ๐Ÿ“Š Service Details")
163
- total_night_minutes = gr.Slider(0, 400, 200, label="Night Minutes")
164
- total_night_calls = gr.Slider(0, 200, 100, label="Night Calls")
165
- total_intl_minutes = gr.Slider(0, 30, 10, label="International Minutes")
166
- total_intl_calls = gr.Slider(0, 20, 3, label="International Calls")
167
- number_vmail_messages = gr.Slider(0, 100, 0, label="Voicemail Messages")
168
-
169
- international_plan = gr.Radio(["Yes", "No"], label="International Plan", value="No")
170
- voice_mail_plan = gr.Radio(["Yes", "No"], label="Voicemail Plan", value="No")
171
-
172
- with gr.Row():
173
- with gr.Column(scale=1):
174
- gr.Markdown("### ๐Ÿ“ˆ Behavioral Features")
175
- avg_daily_gb = gr.Slider(0, 50, 5, label="Daily Data Usage (GB)")
176
- support_tickets_last_90d = gr.Slider(0, 10, 0, label="Support Tickets (90d)")
177
- billing_issues_12m = gr.Slider(0, 12, 0, label="Billing Issues (12m)")
178
- satisfaction_score = gr.Slider(1, 5, 3, label="Satisfaction Score")
179
-
180
- predict_btn = gr.Button("๐Ÿ” Analyze Churn Risk", variant="primary", size="lg")
181
-
182
- with gr.Row():
183
- with gr.Column():
184
- result = gr.Markdown("## ๐ŸŽฏ Churn Risk Analysis")
185
- with gr.Column():
186
- risk_gauge = gr.Plot(label="Risk Level")
187
- feature_importance = gr.Plot(label="Key Factors")
188
-
189
- predict_btn.click(
190
- predict_churn,
191
- inputs=[account_length, custserv_calls, total_day_minutes, total_day_calls,
192
- total_eve_minutes, total_eve_calls, total_night_minutes, total_night_calls,
193
- total_intl_minutes, total_intl_calls, number_vmail_messages,
194
- international_plan, voice_mail_plan, avg_daily_gb,
195
- support_tickets_last_90d, billing_issues_12m, satisfaction_score],
196
- outputs=[result, risk_gauge, feature_importance]
197
  )
198
 
199
- with gr.TabItem("๐Ÿ“Š Batch Analysis"):
200
- gr.Markdown("""
201
- ### ๐Ÿ“ Upload Customer Data
202
-
203
- **CSV Format Requirements:**
204
- - AccountLength, CustServCalls, TotalDayMinutes, TotalDayCalls
205
- - TotalEveMinutes, TotalEveCalls, TotalNightMinutes, TotalNightCalls
206
- - TotalIntlMinutes, TotalIntlCalls, NumberVmailMessages
207
- - InternationalPlan (Yes/No), VoiceMailPlan (Yes/No)
208
- - avg_daily_gb, support_tickets_last_90d, billing_issues_12m, satisfaction_score
209
- """)
210
-
211
- file_input = gr.File(label="Upload CSV file", file_types=[".csv"])
212
- batch_btn = gr.Button("๐Ÿ“ˆ Analyze Batch", variant="primary", size="lg")
213
-
214
- summary = gr.Markdown("## ๐Ÿ“Š Upload your CSV to begin analysis")
215
- distribution_plot = gr.Plot(label="Risk Distribution")
216
- output_file = gr.File(label="๐Ÿ“ฅ Download Results")
217
-
218
- batch_btn.click(
219
- predict_batch,
220
- inputs=[file_input],
221
- outputs=[summary, distribution_plot, output_file]
222
- )
223
 
224
- with gr.TabItem("โ„น๏ธ About & Documentation"):
225
- gr.Markdown("""
226
- ## ๐ŸŽฏ About This Application
227
-
228
- **Telco Churn Predictor** is a production-ready machine learning application that helps telecommunications companies identify customers at risk of leaving their service.
229
-
230
- ### ๐Ÿ† Model Performance
231
- - **AUC Score**: 93.19% (validated on Orange Telecom dataset)
232
- - **Algorithm**: LightGBM with behavioral features
233
- - **Validation**: Customer-level GroupKFold cross-validation
234
- - **Calibration**: Brier Score 0.0087 (well-calibrated probabilities)
235
-
236
- ### ๐Ÿ”ง Technical Stack
237
- - **ML Pipeline**: LightGBM Booster
238
- - **UI Framework**: Gradio 4.17.0 (stable version)
239
- - **Data Processing**: Pandas + NumPy
240
- - **Visualization**: Plotly + Matplotlib
241
- - **Deployment**: Hugging Face Spaces
242
-
243
- ### ๐ŸŽฏ How It Works
244
- 1. **Data Input**: Enter customer details via sliders or upload CSV
245
- 2. **Feature Engineering**: Automatically calculates behavioral patterns
246
- 3. **Prediction**: Uses LightGBM Booster for churn probability
247
- 4. **Risk Assessment**: Categorizes customers into High/Medium/Low risk
248
-
249
- ### ๐Ÿ’ผ Business Value
250
- - **Reduce Churn**: Identify at-risk customers before they leave
251
- - **Increase Revenue**: Retain valuable customers longer
252
- - **Optimize Costs**: Focus retention efforts on high-value customers
253
- - **Improve Service**: Understand and address customer pain points
254
 
255
- ---
256
- **Built with production-grade ML pipeline and validated on real-world data.**
257
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
 
259
  if __name__ == "__main__":
260
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import joblib
4
+ import json
5
  import numpy as np
6
+ from sklearn.preprocessing import LabelEncoder
 
 
7
  import warnings
8
  warnings.filterwarnings('ignore')
9
 
10
+ # Load the trained model
11
  try:
12
  model = joblib.load('churn_pipeline_v1.pkl')
13
+ print("โœ… Model loaded successfully")
 
14
  except Exception as e:
15
+ print(f"โš ๏ธ Error loading model: {e}")
16
  model = None
 
17
 
18
+ # Feature names for the model
19
+ FEATURE_NAMES = [
20
+ 'account_length', 'custserv_calls', 'total_day_minutes',
21
+ 'total_day_calls', 'total_eve_minutes', 'total_eve_calls',
22
+ 'total_night_minutes', 'total_night_calls', 'total_intl_minutes',
23
+ 'total_intl_calls', 'number_vmail_messages', 'international_plan',
24
+ 'voice_mail_plan', 'total_usage', 'usage_intensity'
25
+ ]
26
+
27
+ def prepare_features(df):
28
+ """Prepare features for prediction"""
29
+ # Create behavioral features
30
+ df['total_usage'] = (
31
+ df['total_day_minutes'] +
32
+ df['total_eve_minutes'] +
33
+ df['total_night_minutes']
34
+ )
35
+ df['usage_intensity'] = np.log1p(df['total_usage'])
36
 
37
+ # Ensure all required features are present
38
+ missing_features = [f for f in FEATURE_NAMES if f not in df.columns]
39
+ if missing_features:
40
+ raise ValueError(f"Missing features: {missing_features}")
41
+
42
+ # Handle categorical variables
43
+ categorical_cols = ['international_plan', 'voice_mail_plan']
44
+ for col in categorical_cols:
45
+ if col in df.columns and df[col].dtype == 'object':
46
+ df[col] = df[col].map({'yes': 1, 'no': 0, 'Yes': 1, 'No': 0, True: 1, False: 0})
47
+
48
+ return df[FEATURE_NAMES]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ def predict_csv(file):
51
+ """Predict churn for uploaded CSV file"""
52
  if model is None:
53
+ return "Model not loaded. Please check server logs.", None
54
 
55
  try:
56
  # Read CSV
57
  df = pd.read_csv(file.name)
58
 
59
+ # Prepare features
60
+ X = prepare_features(df)
 
 
 
 
61
 
62
+ # Make predictions
63
+ probabilities = model.predict(X)
64
+ df['churn_probability'] = probabilities
65
+ df['churn_flag'] = (probabilities >= 0.4).astype(int)
 
 
 
 
 
 
66
 
67
  # Save results
68
  output_path = "predictions.csv"
69
  df.to_csv(output_path, index=False)
70
 
71
+ # Return summary
72
+ summary = f"""
73
+ ๐Ÿ“Š **Prediction Summary**
74
+ - Total customers: {len(df)}
75
+ - High churn risk (โ‰ฅ40%): {(probabilities >= 0.4).sum()}
76
+ - Average churn probability: {probabilities.mean():.2%}
77
+ - File saved as: {output_path}
78
+ """
79
+
80
+ return summary, output_path
81
 
82
  except Exception as e:
83
+ return f"Error processing file: {str(e)}", None
84
 
85
+ def predict_single(
86
+ account_length, custserv_calls, total_day_minutes, total_day_calls,
87
+ total_eve_minutes, total_eve_calls, total_night_minutes, total_night_calls,
88
+ total_intl_minutes, total_intl_calls, number_vmail_messages,
89
+ international_plan, voice_mail_plan
90
+ ):
91
+ """Predict churn for single customer"""
92
+ if model is None:
93
+ return {"error": "Model not loaded"}
94
 
95
+ try:
96
+ # Create feature dataframe
97
+ features = {
98
+ 'account_length': account_length,
99
+ 'custserv_calls': custserv_calls,
100
+ 'total_day_minutes': total_day_minutes,
101
+ 'total_day_calls': total_day_calls,
102
+ 'total_eve_minutes': total_eve_minutes,
103
+ 'total_eve_calls': total_eve_calls,
104
+ 'total_night_minutes': total_night_minutes,
105
+ 'total_night_calls': total_night_calls,
106
+ 'total_intl_minutes': total_intl_minutes,
107
+ 'total_intl_calls': total_intl_calls,
108
+ 'number_vmail_messages': number_vmail_messages,
109
+ 'international_plan': 1 if international_plan else 0,
110
+ 'voice_mail_plan': 1 if voice_mail_plan else 0,
111
+ 'total_usage': total_day_minutes + total_eve_minutes + total_night_minutes,
112
+ 'usage_intensity': np.log1p(total_day_minutes + total_eve_minutes + total_night_minutes)
113
+ }
114
+
115
+ X = pd.DataFrame([features])
116
+ probability = float(model.predict(X)[0])
117
+
118
+ return {
119
+ "churn_probability": round(probability, 3),
120
+ "churn_flag": probability >= 0.4,
121
+ "risk_level": "High" if probability >= 0.7 else "Medium" if probability >= 0.4 else "Low",
122
+ "threshold": 0.4
123
+ }
124
+
125
+ except Exception as e:
126
+ return {"error": str(e)}
127
+
128
+ # Create Gradio interface
129
+ theme = gr.themes.Soft(
130
+ primary_hue="blue",
131
+ secondary_hue="slate",
132
+ neutral_hue="gray"
133
+ )
134
+
135
+ with gr.Blocks(theme=theme, title="Telco Churn Predictor") as demo:
136
+ gr.Markdown("""
137
+ # ๐ŸŽฏ Telco Churn Predictor
138
+ **Production-ready churn prediction** with **93% AUC** on real behavioral data
139
  """)
140
 
141
+ with gr.Tab("๐Ÿ“Š Batch Predictions"):
142
+ gr.Markdown("Upload a CSV file with customer data to get churn predictions")
143
+
144
+ with gr.Row():
145
+ csv_input = gr.File(
146
+ label="Upload CSV file",
147
+ file_types=[".csv"],
148
+ file_count="single"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  )
150
 
151
+ predict_btn = gr.Button("๐Ÿ”ฎ Predict Churn", variant="primary")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
 
153
+ summary_output = gr.Textbox(
154
+ label="Prediction Summary",
155
+ lines=5,
156
+ interactive=False
157
+ )
158
+
159
+ file_output = gr.File(
160
+ label="Download predictions",
161
+ visible=True
162
+ )
163
+
164
+ predict_btn.click(
165
+ predict_csv,
166
+ inputs=[csv_input],
167
+ outputs=[summary_output, file_output]
168
+ )
169
+
170
+ with gr.Tab("๐Ÿ‘ค Single Customer"):
171
+ gr.Markdown("Enter customer details to get individual churn prediction")
172
+
173
+ with gr.Row():
174
+ with gr.Column():
175
+ account_length = gr.Number(label="Account Length (months)", value=12, minimum=0)
176
+ custserv_calls = gr.Number(label="Customer Service Calls", value=0, minimum=0)
177
+ total_day_minutes = gr.Number(label="Total Day Minutes", value=150.0, minimum=0)
178
+ total_day_calls = gr.Number(label="Total Day Calls", value=50, minimum=0)
179
+ total_eve_minutes = gr.Number(label="Total Evening Minutes", value=50.0, minimum=0)
180
+ total_eve_calls = gr.Number(label="Total Evening Calls", value=25, minimum=0)
 
 
181
 
182
+ with gr.Column():
183
+ total_night_minutes = gr.Number(label="Total Night Minutes", value=30.0, minimum=0)
184
+ total_night_calls = gr.Number(label="Total Night Calls", value=15, minimum=0)
185
+ total_intl_minutes = gr.Number(label="Total International Minutes", value=10.0, minimum=0)
186
+ total_intl_calls = gr.Number(label="Total International Calls", value=5, minimum=0)
187
+ number_vmail_messages = gr.Number(label="Voicemail Messages", value=5, minimum=0)
188
+ international_plan = gr.Checkbox(label="International Plan")
189
+ voice_mail_plan = gr.Checkbox(label="Voice Mail Plan")
190
+
191
+ predict_single_btn = gr.Button("๐Ÿ”ฎ Predict Churn", variant="primary")
192
+
193
+ prediction_output = gr.JSON(label="Prediction Results")
194
+
195
+ predict_single_btn.click(
196
+ predict_single,
197
+ inputs=[
198
+ account_length, custserv_calls, total_day_minutes, total_day_calls,
199
+ total_eve_minutes, total_eve_calls, total_night_minutes, total_night_calls,
200
+ total_intl_minutes, total_intl_calls, number_vmail_messages,
201
+ international_plan, voice_mail_plan
202
+ ],
203
+ outputs=[prediction_output]
204
+ )
205
+
206
+ with gr.Tab("๐Ÿ“‹ Sample Data"):
207
+ gr.Markdown("""
208
+ ### Expected CSV Format
209
+ Your CSV should contain these columns:
210
+ - account_length
211
+ - custserv_calls
212
+ - total_day_minutes, total_day_calls
213
+ - total_eve_minutes, total_eve_calls
214
+ - total_night_minutes, total_night_calls
215
+ - total_intl_minutes, total_intl_calls
216
+ - number_vmail_messages
217
+ - international_plan (0/1 or yes/no)
218
+ - voice_mail_plan (0/1 or yes/no)
219
+
220
+ ### Performance
221
+ - **AUC**: 93.19% (customer-validated)
222
+ - **Calibration**: Well-calibrated probabilities
223
+ - **Validation**: No data leakage
224
+ """)
225
 
226
  if __name__ == "__main__":
227
+ demo.launch()
requirements.txt CHANGED
@@ -1,9 +1,15 @@
1
- gradio==4.17.0
2
- pandas>=2.2.0
 
 
3
  scikit-learn>=1.4.0
 
 
4
  joblib>=1.3.0
 
 
 
5
  lightgbm>=4.3.0
6
  numpy>=1.26.0
 
7
  plotly>=5.17.0
8
- matplotlib>=3.8.0
9
- seaborn>=0.13.0
 
1
+ torch>=2.3.0
2
+ transformers>=4.51.3
3
+ peft>=0.16.0
4
+ trl>=0.19.0
5
  scikit-learn>=1.4.0
6
+ pandas>=2.2.0
7
+ matplotlib>=3.7.0
8
  joblib>=1.3.0
9
+ fastapi>=0.104.0
10
+ uvicorn>=0.24.0
11
+ pydantic>=2.4.0
12
  lightgbm>=4.3.0
13
  numpy>=1.26.0
14
+ gradio[oauth]>=4.44.1
15
  plotly>=5.17.0