|
from fastapi import FastAPI, HTTPException |
|
import pandas as pd |
|
from pydantic import BaseModel |
|
import joblib |
|
from sklearn.model_selection import train_test_split |
|
from sklearn.ensemble import RandomForestClassifier |
|
from sklearn.linear_model import LogisticRegression |
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
DATASET_PATH = "credit_risk_dataset.csv" |
|
df = pd.read_csv(DATASET_PATH) |
|
|
|
|
|
FEATURES = ["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"] |
|
X = df[FEATURES] |
|
y = df["loan_status"] |
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
classifier = RandomForestClassifier(n_estimators=100, random_state=42) |
|
classifier.fit(X_train, y_train) |
|
joblib.dump(classifier, "models/risk_classifier.pkl") |
|
|
|
regressor = LogisticRegression() |
|
regressor.fit(X_train, y_train) |
|
joblib.dump(regressor, "models/past_due_regressor.pkl") |
|
|
|
|
|
classifier = joblib.load("models/risk_classifier.pkl") |
|
regressor = joblib.load("models/past_due_regressor.pkl") |
|
|
|
|
|
class LoanRequest(BaseModel): |
|
loan_amnt: float |
|
loan_int_rate: float |
|
person_age: int |
|
person_income: float |
|
person_home_ownership: str |
|
cb_person_cred_hist_length: int |
|
|
|
|
|
@app.get("/loan_status_distribution") |
|
def loan_status_distribution(): |
|
if "loan_status" not in df.columns: |
|
raise HTTPException(status_code=400, detail="Missing 'loan_status' column") |
|
|
|
status_counts = df["loan_status"].value_counts(normalize=True) * 100 |
|
return {"default_percentage": status_counts.get(1, 0), "non_default_percentage": status_counts.get(0, 0)} |
|
|
|
@app.get("/payment_timeline_analysis") |
|
def payment_timeline_analysis(): |
|
grouped = df.groupby("loan_status")["loan_amnt"].mean().to_dict() |
|
return {"average_loan_amount_by_status": grouped} |
|
|
|
@app.get("/principal_amount_patterns") |
|
def principal_amount_patterns(): |
|
demographic_defaults = df.groupby(["person_age", "person_income", "person_home_ownership"])['loan_status'].mean().to_dict() |
|
return {"demographic_default_rates": demographic_defaults} |
|
|
|
@app.get("/credit_history_impact") |
|
def credit_history_impact(): |
|
history_impact = df.groupby("cb_person_cred_hist_length")["loan_status"].mean().to_dict() |
|
return {"credit_history_default_rates": history_impact} |
|
|
|
@app.get("/customer_profile_analysis") |
|
def customer_profile_analysis(): |
|
profile_analysis = df.groupby(["person_age", "person_income", "person_home_ownership"])["loan_status"].mean().to_dict() |
|
return {"customer_profile_default_rates": profile_analysis} |
|
|
|
@app.get("/loan_intent_analysis") |
|
def loan_intent_analysis(): |
|
intent_defaults = df.groupby("loan_intent")["loan_status"].mean().to_dict() |
|
return {"loan_intent_default_rates": intent_defaults} |
|
|
|
@app.get("/collection_effectiveness") |
|
def collection_effectiveness(): |
|
success_rate = df.groupby("cb_person_default_on_file")["loan_status"].mean().to_dict() |
|
return {"collection_success_rate": success_rate} |
|
|
|
@app.get("/risk_score_development") |
|
def risk_score_development(): |
|
risk_factors = df.groupby(["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"])["loan_status"].mean().to_dict() |
|
return {"risk_scores": risk_factors} |
|
|
|
@app.post("/predict_loan_risk") |
|
def predict_loan_risk(request: LoanRequest): |
|
input_data = [[request.loan_amnt, request.loan_int_rate, request.person_age, request.person_income, request.cb_person_cred_hist_length]] |
|
risk_class = classifier.predict(input_data)[0] |
|
risk_prob = regressor.predict_proba(input_data)[0][1] |
|
return {"predicted_risk_category": int(risk_class), "default_probability": float(risk_prob)} |
|
|
|
|