sample / main.py
tharu22's picture
one
de6c861
from fastapi import FastAPI, HTTPException
import pandas as pd
from pydantic import BaseModel
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
# Initialize FastAPI app
app = FastAPI()
# Load dataset
DATASET_PATH = "credit_risk_dataset.csv" # Update with actual dataset path
df = pd.read_csv(DATASET_PATH)
# Prepare data for ML training
FEATURES = ["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"]
X = df[FEATURES]
y = df["loan_status"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train models
classifier = RandomForestClassifier(n_estimators=100, random_state=42)
classifier.fit(X_train, y_train)
joblib.dump(classifier, "models/risk_classifier.pkl")
regressor = LogisticRegression()
regressor.fit(X_train, y_train)
joblib.dump(regressor, "models/past_due_regressor.pkl")
# Load ML models
classifier = joblib.load("models/risk_classifier.pkl")
regressor = joblib.load("models/past_due_regressor.pkl")
# Pydantic models for validation
class LoanRequest(BaseModel):
loan_amnt: float
loan_int_rate: float
person_age: int
person_income: float
person_home_ownership: str
cb_person_cred_hist_length: int
# API Endpoints
@app.get("/loan_status_distribution")
def loan_status_distribution():
if "loan_status" not in df.columns:
raise HTTPException(status_code=400, detail="Missing 'loan_status' column")
status_counts = df["loan_status"].value_counts(normalize=True) * 100
return {"default_percentage": status_counts.get(1, 0), "non_default_percentage": status_counts.get(0, 0)}
@app.get("/payment_timeline_analysis")
def payment_timeline_analysis():
grouped = df.groupby("loan_status")["loan_amnt"].mean().to_dict()
return {"average_loan_amount_by_status": grouped}
@app.get("/principal_amount_patterns")
def principal_amount_patterns():
demographic_defaults = df.groupby(["person_age", "person_income", "person_home_ownership"])['loan_status'].mean().to_dict()
return {"demographic_default_rates": demographic_defaults}
@app.get("/credit_history_impact")
def credit_history_impact():
history_impact = df.groupby("cb_person_cred_hist_length")["loan_status"].mean().to_dict()
return {"credit_history_default_rates": history_impact}
@app.get("/customer_profile_analysis")
def customer_profile_analysis():
profile_analysis = df.groupby(["person_age", "person_income", "person_home_ownership"])["loan_status"].mean().to_dict()
return {"customer_profile_default_rates": profile_analysis}
@app.get("/loan_intent_analysis")
def loan_intent_analysis():
intent_defaults = df.groupby("loan_intent")["loan_status"].mean().to_dict()
return {"loan_intent_default_rates": intent_defaults}
@app.get("/collection_effectiveness")
def collection_effectiveness():
success_rate = df.groupby("cb_person_default_on_file")["loan_status"].mean().to_dict()
return {"collection_success_rate": success_rate}
@app.get("/risk_score_development")
def risk_score_development():
risk_factors = df.groupby(["loan_amnt", "loan_int_rate", "person_age", "person_income", "cb_person_cred_hist_length"])["loan_status"].mean().to_dict()
return {"risk_scores": risk_factors}
@app.post("/predict_loan_risk")
def predict_loan_risk(request: LoanRequest):
input_data = [[request.loan_amnt, request.loan_int_rate, request.person_age, request.person_income, request.cb_person_cred_hist_length]]
risk_class = classifier.predict(input_data)[0]
risk_prob = regressor.predict_proba(input_data)[0][1]
return {"predicted_risk_category": int(risk_class), "default_probability": float(risk_prob)}