suvradeepp commited on
Commit
cea1951
·
verified ·
1 Parent(s): fd82ad5

Upload 89 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +14 -0
  2. .gitignore +207 -0
  3. README.md +352 -15
  4. app.py +431 -0
  5. explainable-credit-risk-modeling-with-schduling.ipynb +0 -0
  6. kaggle_output/.virtual_documents/__notebook_source__.ipynb +1196 -0
  7. kaggle_output/drift_simulation.png +3 -0
  8. kaggle_output/eda_overview.png +3 -0
  9. kaggle_output/explainable-credit-risk-modeling-with-alternative.log +15 -0
  10. kaggle_output/models/feature_cols.json +1 -0
  11. kaggle_output/models/lgbm_fold_1.txt +0 -0
  12. kaggle_output/models/lgbm_fold_2.txt +0 -0
  13. kaggle_output/models/lgbm_fold_3.txt +3 -0
  14. kaggle_output/models/lgbm_fold_4.txt +3 -0
  15. kaggle_output/models/lgbm_fold_5.txt +3 -0
  16. kaggle_output/models/pca.pkl +3 -0
  17. kaggle_output/models/scaler.pkl +3 -0
  18. kaggle_output/models/xgb_fold_1.json +0 -0
  19. kaggle_output/models/xgb_fold_2.json +0 -0
  20. kaggle_output/models/xgb_fold_3.json +0 -0
  21. kaggle_output/models/xgb_fold_4.json +0 -0
  22. kaggle_output/models/xgb_fold_5.json +0 -0
  23. kaggle_output/river_drift_detection.png +3 -0
  24. kaggle_output/shap_bar.png +3 -0
  25. kaggle_output/shap_beeswarm.png +3 -0
  26. kaggle_output/shap_dependence.png +3 -0
  27. kaggle_output/shap_waterfall.png +3 -0
  28. kaggle_output/submission_ensemble.csv +0 -0
  29. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/config.yaml +80 -0
  30. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/media/table/feature_importance_6_9280c5e00d174ed85360.table.json +1 -0
  31. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/output.log +12 -0
  32. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/requirements.txt +974 -0
  33. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/wandb-metadata.json +42 -0
  34. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/wandb-summary.json +1 -0
  35. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/logs/debug-internal.log +12 -0
  36. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/logs/debug.log +29 -0
  37. kaggle_output/wandb/run-20260331_065527-jxzjz5r3/run-jxzjz5r3.wandb +0 -0
  38. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/config.yaml +122 -0
  39. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/output.log +65 -0
  40. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/requirements.txt +974 -0
  41. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/wandb-metadata.json +42 -0
  42. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/wandb-summary.json +1 -0
  43. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/logs/debug-internal.log +12 -0
  44. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/logs/debug.log +26 -0
  45. kaggle_output/wandb/run-20260331_094728-v7xaa9j7/run-v7xaa9j7.wandb +0 -0
  46. kaggle_output/wandb/run-20260331_095935-cu87492i/files/config.yaml +85 -0
  47. kaggle_output/wandb/run-20260331_095935-cu87492i/files/output.log +3 -0
  48. kaggle_output/wandb/run-20260331_095935-cu87492i/files/requirements.txt +974 -0
  49. kaggle_output/wandb/run-20260331_095935-cu87492i/files/wandb-metadata.json +42 -0
  50. kaggle_output/wandb/run-20260331_095935-cu87492i/files/wandb-summary.json +1 -0
.gitattributes CHANGED
@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kaggle_output/drift_simulation.png filter=lfs diff=lfs merge=lfs -text
37
+ kaggle_output/eda_overview.png filter=lfs diff=lfs merge=lfs -text
38
+ kaggle_output/models/lgbm_fold_3.txt filter=lfs diff=lfs merge=lfs -text
39
+ kaggle_output/models/lgbm_fold_4.txt filter=lfs diff=lfs merge=lfs -text
40
+ kaggle_output/models/lgbm_fold_5.txt filter=lfs diff=lfs merge=lfs -text
41
+ kaggle_output/river_drift_detection.png filter=lfs diff=lfs merge=lfs -text
42
+ kaggle_output/shap_bar.png filter=lfs diff=lfs merge=lfs -text
43
+ kaggle_output/shap_beeswarm.png filter=lfs diff=lfs merge=lfs -text
44
+ kaggle_output/shap_dependence.png filter=lfs diff=lfs merge=lfs -text
45
+ kaggle_output/shap_waterfall.png filter=lfs diff=lfs merge=lfs -text
46
+ models/lgbm_fold_3.txt filter=lfs diff=lfs merge=lfs -text
47
+ models/lgbm_fold_4.txt filter=lfs diff=lfs merge=lfs -text
48
+ models/lgbm_fold_5.txt filter=lfs diff=lfs merge=lfs -text
49
+ utils/2.jpeg filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[codz]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py.cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+ #poetry.toml
110
+
111
+ # pdm
112
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
113
+ # pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
114
+ # https://pdm-project.org/en/latest/usage/project/#working-with-version-control
115
+ #pdm.lock
116
+ #pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # pixi
121
+ # Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
122
+ #pixi.lock
123
+ # Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
124
+ # in the .venv directory. It is recommended not to include this directory in version control.
125
+ .pixi
126
+
127
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
128
+ __pypackages__/
129
+
130
+ # Celery stuff
131
+ celerybeat-schedule
132
+ celerybeat.pid
133
+
134
+ # SageMath parsed files
135
+ *.sage.py
136
+
137
+ # Environments
138
+ .env
139
+ .envrc
140
+ .venv
141
+ env/
142
+ venv/
143
+ ENV/
144
+ env.bak/
145
+ venv.bak/
146
+
147
+ # Spyder project settings
148
+ .spyderproject
149
+ .spyproject
150
+
151
+ # Rope project settings
152
+ .ropeproject
153
+
154
+ # mkdocs documentation
155
+ /site
156
+
157
+ # mypy
158
+ .mypy_cache/
159
+ .dmypy.json
160
+ dmypy.json
161
+
162
+ # Pyre type checker
163
+ .pyre/
164
+
165
+ # pytype static type analyzer
166
+ .pytype/
167
+
168
+ # Cython debug symbols
169
+ cython_debug/
170
+
171
+ # PyCharm
172
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
173
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
174
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
175
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
176
+ #.idea/
177
+
178
+ # Abstra
179
+ # Abstra is an AI-powered process automation framework.
180
+ # Ignore directories containing user credentials, local state, and settings.
181
+ # Learn more at https://abstra.io/docs
182
+ .abstra/
183
+
184
+ # Visual Studio Code
185
+ # Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
186
+ # that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
187
+ # and can be added to the global gitignore or merged into this file. However, if you prefer,
188
+ # you could uncomment the following to ignore the entire vscode folder
189
+ # .vscode/
190
+
191
+ # Ruff stuff:
192
+ .ruff_cache/
193
+
194
+ # PyPI configuration file
195
+ .pypirc
196
+
197
+ # Cursor
198
+ # Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
199
+ # exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
200
+ # refer to https://docs.cursor.com/context/ignore-files
201
+ .cursorignore
202
+ .cursorindexingignore
203
+
204
+ # Marimo
205
+ marimo/_static/
206
+ marimo/_lsp/
207
+ __marimo__/
README.md CHANGED
@@ -1,20 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
- title: Explainable Credit Risk Modeling With Schduling
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- license: mit
 
 
 
 
 
 
 
 
 
 
13
  ---
14
 
15
- # Welcome to Streamlit!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
20
- forums](https://discuss.streamlit.io).
 
 
 
1
+ <p align="center">
2
+ <h1 align="center">💳 Credit Invisibility Solver</h1>
3
+ <p align="center">
4
+ <b>Explainable Credit Risk Modeling with Alternative Data, NLP Embeddings & Concept Drift Detection</b>
5
+ </p>
6
+ <p align="center">
7
+ <img src="https://img.shields.io/badge/Model-LightGBM%20%2B%20XGBoost-brightgreen" />
8
+ <img src="https://img.shields.io/badge/NLP-Sentence--BERT-blue" />
9
+ <img src="https://img.shields.io/badge/Explainability-SHAP-orange" />
10
+ <img src="https://img.shields.io/badge/Drift-River%20ADWIN-red" />
11
+ <img src="https://img.shields.io/badge/Tracking-W%26B-yellow" />
12
+ <img src="https://img.shields.io/badge/Deploy-Streamlit-ff4b4b" />
13
+ </p>
14
+ </p>
15
+
16
+ ---
17
+
18
+ ## Problem Statement
19
+
20
+ **1.7 billion adults worldwide are credit-invisible** — they have no formal credit history, locking them out of loans, insurance, and financial services. Traditional credit scoring relies on bureau data that simply doesn't exist for these populations.
21
+
22
+ This project builds an **end-to-end ML pipeline** that:
23
+ 1. **Engineers 200+ features** from 7 relational tables (Home Credit Default Risk dataset)
24
+ 2. **Fuses tabular + NLP signals** using Sentence-BERT embeddings of synthesized financial narratives
25
+ 3. **Trains an optimized LightGBM + XGBoost ensemble** with Optuna hyperparameter tuning
26
+ 4. **Explains every prediction** with SHAP (beeswarm, waterfall, dependence plots)
27
+ 5. **Detects concept drift** in real-time using River's ADWIN detector
28
+ 6. **Deploys as an interactive Streamlit dashboard** for instant credit scoring
29
+
30
+ ---
31
+
32
+ ## 🏗️ Architecture
33
+
34
+ ```
35
+ ┌─────────────────────────────────────────────────────────────────────┐
36
+ │ 7 Raw Tables (Home Credit) │
37
+ │ application_train/test │ bureau │ prev_app │ installments │ ... │
38
+ └──────────────┬──────────────────────────────────────────────────────┘
39
+
40
+ ┌──────────▼──────────┐ ┌──────────────────────┐
41
+ │ Feature Engineering │ │ NLP Pipeline │
42
+ │ • Bureau aggregates │ │ • Financial narrative │
43
+ │ • Prev app signals │ │ synthesis │
44
+ │ • Installment │ │ • Sentence-BERT │
45
+ │ behaviour │ │ encoding │
46
+ │ • POS Cash / CC │ │ • PCA → 32 dims │
47
+ │ • Domain ratios │ └──────────┬─────────────┘
48
+ └──────────┬──────────┘ │
49
+ │ ┌────────────────┘
50
+ ▼ ▼
51
+ ┌──────────────────────────┐
52
+ │ Merged Feature Matrix │
53
+ │ 207 features total │
54
+ │ (175 tabular + 32 NLP) │
55
+ └──────────┬───────────────┘
56
+
57
+ ┌──────────▼──────────┐
58
+ │ Optuna HPO │
59
+ │ 80 trials each │
60
+ │ TPE + MedianPruner │
61
+ └──────────┬───────────┘
62
+
63
+ ┌──────────▼──────────────────────┐
64
+ │ Ensemble: 0.6×LightGBM + 0.4×XGBoost │
65
+ │ 5-Fold Stratified CV │
66
+ │ + Logistic Regression Blending │
67
+ └──────────┬──────────────────────────────┘
68
+
69
+ ┌──────────▼──────────┐ ┌──────────────────────┐
70
+ │ SHAP Explainability │ │ River ADWIN Drift │
71
+ │ • TreeExplainer │ │ • Online learning │
72
+ │ • Beeswarm / Bar │ │ • Auto-retrain │
73
+ │ • Waterfall │ │ • Drift simulation │
74
+ └──────────────────────┘ └──────────────────────┘
75
+
76
+ ┌──────────▼──────────┐
77
+ │ Streamlit Dashboard │
78
+ │ • Live scoring │
79
+ │ • SHAP per applicant │
80
+ │ • Drift sensitivity │
81
+ └───────────────────────┘
82
+ ```
83
+
84
+ ---
85
+
86
+ ## Streamlit Dashboard
87
+
88
+ ### Score Breakdown — Gauge Chart + Risk Factor Radar
89
+
90
+ ![Score Breakdown](utils/1.jpeg)
91
+
92
+ ### SHAP Explainability — Top 15 Feature Contributions
93
+
94
+ ![SHAP Explainability](utils/2.jpeg)
95
+
96
+ ### Drift Simulation — Income Shock Sensitivity
97
+
98
+ ![Drift Simulation](utils/3.jpeg)
99
+
100
+ ### Feature Profile — Applicant Summary Table
101
+
102
+ ![Feature Profile](utils/4.jpeg)
103
+
104
  ---
105
+
106
+ ## Project Structure
107
+
108
+ ```
109
+ ├── notebook.ipynb ← Kaggle training notebook (18 cells)
110
+ ├── streamlit_app.py ← Interactive deployment dashboard
111
+ ├── src/
112
+ │ ├── feature_engineering.py ← 7-table feature pipeline
113
+ │ ├── drift_detector.py ← ADWIN/KSWIN drift detection + River online learner
114
+ │ └── nlp_features.py ← Sentence-BERT embedding pipeline
115
+ ├── models/ ← Saved model artifacts
116
+ │ ├── lgbm_fold_{1-5}.txt ← 5 LightGBM fold models
117
+ │ ├── xgb_fold_{1-5}.json ← 5 XGBoost fold models
118
+ │ ├── pca.pkl ← Fitted PCA for NLP embeddings
119
+ │ ├── scaler.pkl ← Fitted StandardScaler
120
+ │ └── feature_cols.json ← 207 feature column names
121
+ ├── kaggle_output/ ← Full Kaggle run artifacts (plots, submission, logs)
122
+ ├── requirements.txt
123
+ └── README.md
124
+ ```
125
+
126
  ---
127
 
128
+ ## EDA & Training Results
129
+
130
+ ### Exploratory Data Analysis
131
+
132
+ ![EDA Overview](kaggle_output/eda_overview.png)
133
+
134
+ **Key findings:**
135
+ - **91.9% non-default vs 8.1% default** — severe class imbalance (11.4:1 ratio)
136
+ - Income distributions overlap heavily between defaulters and non-defaulters
137
+ - Age is weakly predictive — younger applicants default slightly more
138
+ - Occupation type shows strong signal (Laborers, Drivers have highest default rates)
139
+ - 40%+ missing values in housing and employment-related columns
140
+
141
+ ---
142
+
143
+ ### SHAP Feature Importance
144
+
145
+ #### Global Feature Importance (Mean |SHAP|)
146
+
147
+ ![SHAP Bar Plot](kaggle_output/shap_bar.png)
148
+
149
+ **Top predictive features:** `EXT_SOURCE_MEAN`, `EXT_SOURCE_2`, `DAYS_BIRTH` (age), `CREDIT_INCOME_RATIO`, and `DAYS_EMPLOYED` dominate the model's decisions.
150
+
151
+ #### SHAP Beeswarm — Per-Feature Impact Distribution
152
+
153
+ ![SHAP Beeswarm](kaggle_output/shap_beeswarm.png)
154
+
155
+ Each dot represents one applicant. Red = high feature value, Blue = low. Features like `EXT_SOURCE_MEAN` show a clear trend: **higher external scores → lower default risk**.
156
+
157
+ #### SHAP Dependence Plots — Top 3 Features
158
+
159
+ ![SHAP Dependence](kaggle_output/shap_dependence.png)
160
+
161
+ Non-linear relationships revealed by SHAP dependence: external scores have diminishing returns beyond 0.7, and credit-to-income ratio inflects sharply above 3x.
162
+
163
+ #### SHAP Waterfall — Highest-Risk Applicant
164
+
165
+ ![SHAP Waterfall](kaggle_output/shap_waterfall.png)
166
+
167
+ Per-applicant explanation showing exactly how each feature pushed the prediction above/below the base rate. This is the core of the "explainability" promise.
168
+
169
+ ---
170
+
171
+ ### Concept Drift Simulation
172
+
173
+ #### Batch Drift Scenarios
174
+
175
+ ![Drift Simulation](kaggle_output/drift_simulation.png)
176
+
177
+ Simulated economic shocks (income reduction, mass job loss) show model AUC degradation. Under a **60% income shock**, AUC drops significantly, validating the need for drift detection.
178
+
179
+ #### River ADWIN Online Drift Detection
180
+
181
+ ![River Drift Detection](kaggle_output/river_drift_detection.png)
182
+
183
+ The ADWIN detector correctly identifies the injected concept drift at sample ~185k. The adaptive Hoeffding Tree auto-retrains on detection, showing the cumulative drift event count.
184
+
185
+ ---
186
+
187
+ ## Feature Engineering Pipeline
188
+
189
+ ### 7 Source Tables → 207 Features
190
 
191
+ | Source Table | Features | Key Signals |
192
+ |---|---|---|
193
+ | **Application** | Domain ratios, external scores, age/employment, document flags | `CREDIT_INCOME_RATIO`, `EXT_SOURCE_MEAN`, `AGE_YEARS` |
194
+ | **Bureau** | Credit history aggregates, DPD rates, utilization | `BUREAU_DEBT_CREDIT_RATIO_MAX`, `BUREAU_ACTIVE_COUNT` |
195
+ | **Previous Apps** | Approval/refusal rates, application patterns | `PREV_APPROVED_RATE`, `PREV_APP_CREDIT_RATIO_MEAN` |
196
+ | **Installments** | Payment behaviour, late/short payment rates | `INST_LATE_PAYMENT_RATE`, `INST_PAYMENT_DIFF_MEAN` |
197
+ | **POS Cash** | Point-of-sale DPD patterns | `POS_DPD_RATE`, `POS_SK_DPD_MAX` |
198
+ | **Credit Card** | Utilization rates, drawing behaviour | `CC_UTIL_RATE_MEAN`, `CC_DRAWING_RATE_MEAN` |
199
+ | **NLP Embeddings** | Sentence-BERT + PCA (32 dims) | `NLP_EMB_0` through `NLP_EMB_31` |
200
+
201
+ ### NLP Feature Pipeline
202
+
203
+ Financial narratives are **synthesized from tabular signals** (in production, these would come from real user survey text, app usage data, or financial literacy assessments):
204
+
205
+ ```
206
+ "Applicant aged 35 years with annual income of 250000 currency units.
207
+ Requesting credit of 500000 for personal needs. Employed for 5.0 years.
208
+ Client demonstrates moderate financial awareness with occasional late payments.
209
+ External credit assessment score: 0.55. Owns property which serves as collateral."
210
+ ```
211
+
212
+ These are encoded with **Sentence-BERT (all-MiniLM-L6-v2)** and reduced to 32 dimensions via PCA, capturing semantic credit signals.
213
+
214
+ ---
215
+
216
+ ## Model Training
217
+
218
+ ### Ensemble Strategy
219
+
220
+ | Component | Method | OOF AUC |
221
+ |---|---|---|
222
+ | **LightGBM** | 5-fold CV, Optuna-tuned (80 trials) | ~0.78 |
223
+ | **XGBoost** | 5-fold CV, Optuna-tuned (80 trials) | ~0.77 |
224
+ | **Ensemble** | 0.6×LGBM + 0.4×XGB weighted blend | ~0.79 |
225
+
226
+ ### Blending with Logistic Regression
227
+
228
+ The ensemble uses a **fixed 60/40 weighted average** of LightGBM and XGBoost OOF predictions. In an extended pipeline, a **Logistic Regression meta-learner** can be stacked on top of the base model predictions:
229
+
230
+ ```python
231
+ from sklearn.linear_model import LogisticRegression
232
+
233
+ # Stack OOF predictions as meta-features
234
+ meta_X = np.column_stack([oof_lgbm, oof_xgb])
235
+ meta_lr = LogisticRegression(C=1.0)
236
+ meta_lr.fit(meta_X, y)
237
+
238
+ # Final blend = LR(lgbm_pred, xgb_pred)
239
+ test_blend = meta_lr.predict_proba(np.column_stack([test_lgbm, test_xgb]))[:, 1]
240
+ ```
241
+
242
+ This learns the optimal blending weights from data rather than using fixed 60/40.
243
+
244
+ ### Optuna Hyperparameter Optimization
245
+
246
+ - **Sampler:** TPE (Tree-Structured Parzen Estimator)
247
+ - **Pruner:** MedianPruner with 10 warmup steps — kills bad trials early
248
+ - **Search space:** `num_leaves`, `learning_rate`, `feature_fraction`, `bagging_fraction`, `reg_alpha/lambda`, `max_depth`, `min_gain_to_split`
249
+
250
+ ---
251
+
252
+ ## Concept Drift Detection
253
+
254
+ ### Why Drift Matters
255
+
256
+ Credit models degrade over time as economic conditions change. A model trained on pre-pandemic data won't perform well during a recession. This project implements:
257
+
258
+ 1. **Batch drift simulation** — apply synthetic income shocks (30-70% reduction) and measure AUC degradation
259
+ 2. **Online drift detection** — River's ADWIN detector monitors the prediction error stream in real-time
260
+ 3. **Auto-retrain** — when ADWIN fires, the Hoeffding Adaptive Tree resets with a faster learning rate
261
+
262
+ ### River Pipeline
263
+
264
+ ```python
265
+ # Online pipeline: StandardScaler → Hoeffding Adaptive Tree
266
+ pipeline = StandardScaler() | HoeffdingAdaptiveTreeClassifier(grace_period=200)
267
+
268
+ # ADWIN monitors error stream
269
+ adwin = ADWIN(delta=0.002) # lower delta = more sensitive
270
+
271
+ # On drift detection → rebuild pipeline with faster adaptation
272
+ if adwin.drift_detected:
273
+ pipeline = StandardScaler() | HoeffdingAdaptiveTreeClassifier(grace_period=50)
274
+ ```
275
+
276
+ ---
277
+
278
+ ## Quick Start
279
+
280
+ ### Prerequisites
281
+
282
+ - Python 3.10+
283
+ - [uv](https://docs.astral.sh/uv/) (recommended) or pip
284
+
285
+ ### Setup
286
+
287
+ ```bash
288
+ # Clone
289
+ git clone https://github.com/suvraadeep/Explainable-Credit-Risk-Modeling-with-Schduling.git
290
+ cd Explainable-Credit-Risk-Modeling-with-Schduling
291
+
292
+ # Create venv and install deps
293
+ uv venv .venv
294
+ uv pip install --python .venv/Scripts/python.exe -r requirements.txt
295
+
296
+ # Or with pip
297
+ python -m venv .venv
298
+ .venv/Scripts/activate # Windows
299
+ pip install -r requirements.txt
300
+ ```
301
+
302
+ ### Run the Streamlit Dashboard
303
+
304
+ ```bash
305
+ # Windows
306
+ .venv\Scripts\streamlit.exe run app.py
307
+
308
+ # Linux/Mac
309
+ .venv/bin/streamlit run app.py
310
+ ```
311
+
312
+ Open **http://localhost:8501** in your browser.
313
+
314
+ ### Kaggle Notebook
315
+
316
+ The full training pipeline runs on Kaggle with the [Home Credit Default Risk](https://www.kaggle.com/c/home-credit-default-risk) dataset. Upload the notebook and run all 18 cells to reproduce:
317
+ - Feature engineering across 7 tables
318
+ - Sentence-BERT NLP embeddings
319
+ - Optuna HPO for LightGBM and XGBoost
320
+ - 5-fold ensemble training
321
+ - SHAP explainability suite
322
+ - River online drift detection
323
+ - W&B experiment logging
324
+
325
+ ---
326
+
327
+ ## W&B Experiment Tracking
328
+
329
+ All experiments are tracked with [Weights & Biases](https://wandb.ai/):
330
+
331
+ | Run | Metrics Logged |
332
+ |---|---|
333
+ | `lgbm-baseline` | Per-fold AUC, feature importance table |
334
+ | `ensemble-lgbm-xgb` | Per-fold LightGBM/XGBoost/Ensemble AUC |
335
+ | `concept-drift-simulation` | AUC under 5 economic shock scenarios |
336
+ | `final-summary` | Consolidated metrics, artifact upload |
337
+
338
+ Set your API key:
339
+ ```bash
340
+ # Kaggle → Secrets → WANDB_API_KEY
341
+ # Or in notebook:
342
+ import wandb
343
+ wandb.login()
344
+ ```
345
+
346
+ ---
347
+
348
+ ## License
349
+
350
+ This project is licensed under the MIT License — see the [LICENSE](LICENSE) file for details.
351
+
352
+ ---
353
 
354
+ <p align="center">
355
+ <b>Built for the 1.7B credit-invisible 🌍</b><br>
356
+ <sub>LightGBM + XGBoost + Sentence-BERT + SHAP + River (ADWIN) + W&B</sub>
357
+ </p>
app.py ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py
2
+ """
3
+ Credit Invisibility Solver — Streamlit App
4
+ Run: streamlit run streamlit_app.py
5
+ """
6
+
7
+ import streamlit as st
8
+ import numpy as np
9
+ import pandas as pd
10
+ import shap
11
+ import lightgbm as lgb
12
+ import matplotlib.pyplot as plt
13
+ import matplotlib
14
+ matplotlib.use("Agg")
15
+ import json
16
+ import joblib
17
+ import os
18
+ import plotly.graph_objects as go
19
+ from sentence_transformers import SentenceTransformer
20
+ from sklearn.decomposition import PCA
21
+
22
+ # ─── Page config ──────────────────────────────────────────────────────────────
23
+ st.set_page_config(
24
+ page_title="Credit Invisibility Solver",
25
+ page_icon="💳",
26
+ layout="wide",
27
+ initial_sidebar_state="expanded",
28
+ )
29
+
30
+ # ─── Custom CSS ───────────────────────────────────────────────────────────────
31
+ st.markdown("""
32
+ <style>
33
+ .main-header { font-size: 2.4rem; font-weight: 800; color: #1565C0; text-align: center; padding: 1rem 0; }
34
+ .metric-card { background: linear-gradient(135deg, #1565C0, #42A5F5); border-radius: 12px;
35
+ padding: 1.2rem; color: white; text-align: center; }
36
+ .risk-high { background: #FFEBEE; border-left: 5px solid #F44336; padding: 1rem; border-radius: 8px; }
37
+ .risk-medium { background: #FFF8E1; border-left: 5px solid #FF9800; padding: 1rem; border-radius: 8px; }
38
+ .risk-low { background: #E8F5E9; border-left: 5px solid #4CAF50; padding: 1rem; border-radius: 8px; }
39
+ .sidebar-section { font-size: 0.9rem; color: #666; margin-bottom: 0.4rem; font-weight: 600; }
40
+ </style>
41
+ """, unsafe_allow_html=True)
42
+
43
+ # ─── Load artifacts ───────────────────────────────────────────────────────────
44
+ MODEL_DIR = "./models"
45
+
46
+ @st.cache_resource
47
+ def load_models():
48
+ models = []
49
+ for i in range(1, 6):
50
+ path = f"{MODEL_DIR}/lgbm_fold_{i}.txt"
51
+ if os.path.exists(path):
52
+ m = lgb.Booster(model_file=path)
53
+ models.append(m)
54
+ return models
55
+
56
+ @st.cache_resource
57
+ def load_artifacts():
58
+ pca = joblib.load(f"{MODEL_DIR}/pca.pkl") if os.path.exists(f"{MODEL_DIR}/pca.pkl") else None
59
+ scaler = joblib.load(f"{MODEL_DIR}/scaler.pkl") if os.path.exists(f"{MODEL_DIR}/scaler.pkl") else None
60
+ fc_path = f"{MODEL_DIR}/feature_cols.json"
61
+ if os.path.exists(fc_path):
62
+ with open(fc_path) as f:
63
+ feature_cols = json.load(f)
64
+ else:
65
+ feature_cols = []
66
+ return pca, scaler, feature_cols
67
+
68
+ @st.cache_resource
69
+ def load_sbert():
70
+ try:
71
+ return SentenceTransformer("all-MiniLM-L6-v2")
72
+ except Exception:
73
+ return None
74
+
75
+ # ─── Helper functions ─────────────────────────────────────────────────────────
76
+ def build_single_applicant_features(inputs: dict, pca, sbert) -> pd.DataFrame:
77
+ """Transform raw user inputs into model-ready features."""
78
+ income = inputs["income"]
79
+ credit = inputs["credit_amount"]
80
+ age = inputs["age"]
81
+ emp_yrs = inputs["employment_years"]
82
+ ext1 = inputs["ext_score_1"]
83
+ ext2 = inputs["ext_score_2"]
84
+ ext3 = inputs["ext_score_3"]
85
+
86
+ # Build a synthetic text for NLP embedding
87
+ literacy = "strong financial planning habits" if np.mean([ext1,ext2,ext3]) > 0.6 else (
88
+ "moderate financial awareness" if np.mean([ext1,ext2,ext3]) > 0.4 else
89
+ "limited financial experience")
90
+ text = (
91
+ f"Applicant aged {age:.0f} years with annual income of {income:.0f}. "
92
+ f"Requesting credit of {credit:.0f}. Employed for {emp_yrs:.1f} years. "
93
+ f"Client demonstrates {literacy}. External score: {np.mean([ext1,ext2,ext3]):.2f}. "
94
+ f"{'Owns property.' if inputs['owns_realty'] else 'No property.'} "
95
+ f"{'Has dependents.' if inputs['has_children'] else 'No children.'}"
96
+ )
97
+
98
+ # NLP embed + PCA
99
+ if sbert is not None:
100
+ emb = sbert.encode([text], normalize_embeddings=True)
101
+ if pca is not None:
102
+ emb = pca.transform(emb)
103
+ nlp_dict = {f"NLP_EMB_{i}": emb[0][i] for i in range(emb.shape[1])}
104
+ else:
105
+ # Demo mode — deterministic pseudo-embeddings
106
+ n_dims = pca.n_components_ if pca is not None else 32
107
+ nlp_dict = {f"NLP_EMB_{i}": 0.0 for i in range(n_dims)}
108
+
109
+ # Tabular features
110
+ tab_dict = {
111
+ "AMT_INCOME_TOTAL": income,
112
+ "AMT_CREDIT": credit,
113
+ "AMT_ANNUITY": inputs["annuity"],
114
+ "AMT_GOODS_PRICE": credit * 0.9,
115
+ "DAYS_BIRTH": -age * 365,
116
+ "DAYS_EMPLOYED": -emp_yrs * 365,
117
+ "EXT_SOURCE_1": ext1,
118
+ "EXT_SOURCE_2": ext2,
119
+ "EXT_SOURCE_3": ext3,
120
+ "EXT_SOURCE_MEAN": np.mean([ext1, ext2, ext3]),
121
+ "EXT_SOURCE_MIN": np.min([ext1, ext2, ext3]),
122
+ "EXT_SOURCE_PROD": ext1 * ext2 * ext3,
123
+ "EXT_SOURCE_STD": np.std([ext1, ext2, ext3]),
124
+ "EXT1_EXT2_INTERACTION": ext1 * ext2,
125
+ "EXT2_EXT3_INTERACTION": ext2 * ext3,
126
+ "CREDIT_INCOME_RATIO": credit / (income + 1),
127
+ "ANNUITY_INCOME_RATIO": inputs["annuity"] / (income + 1),
128
+ "CREDIT_TERM": inputs["annuity"] / (credit + 1),
129
+ "AGE_YEARS": age,
130
+ "EMPLOYMENT_YEARS": emp_yrs,
131
+ "EMPLOYED_RATIO": emp_yrs / (age + 1),
132
+ "INCOME_PER_PERSON": income / (inputs["family_size"] + 1),
133
+ "CNT_FAM_MEMBERS": inputs["family_size"],
134
+ "CNT_CHILDREN": inputs["n_children"],
135
+ "CHILDREN_RATIO": inputs["n_children"] / (inputs["family_size"] + 1),
136
+ "FLAG_OWN_REALTY": int(inputs["owns_realty"]),
137
+ "FLAG_OWN_CAR": int(inputs["owns_car"]),
138
+ "HAS_CAR_REALTY": int(inputs["owns_realty"] and inputs["owns_car"]),
139
+ "DOCUMENT_COUNT": inputs["doc_count"],
140
+ "TOTAL_ENQUIRIES": inputs["total_enquiries"],
141
+ "BUREAU_COUNT": inputs["bureau_count"],
142
+ "BUREAU_ACTIVE_COUNT": inputs["bureau_active"],
143
+ }
144
+
145
+ feat = {**tab_dict, **nlp_dict}
146
+ return pd.DataFrame([feat])
147
+
148
+ def predict_risk(df_feat: pd.DataFrame, models: list, feature_cols: list) -> float:
149
+ """Ensemble predict across all loaded fold models."""
150
+ # Align columns — fill missing with 0
151
+ for col in feature_cols:
152
+ if col not in df_feat.columns:
153
+ df_feat[col] = 0.0
154
+ df_feat = df_feat[feature_cols]
155
+ preds = [m.predict(df_feat, num_iteration=m.best_iteration) for m in models]
156
+ return float(np.mean(preds))
157
+
158
+ def risk_band(score: float) -> tuple:
159
+ if score < 0.15:
160
+ return "LOW RISK", "risk-low", "#4CAF50", "✅"
161
+ elif score < 0.40:
162
+ return "MEDIUM RISK", "risk-medium", "#FF9800", "⚠️"
163
+ else:
164
+ return "HIGH RISK", "risk-high", "#F44336", "🚨"
165
+
166
+ def get_shap_values(model, df_feat, feature_cols):
167
+ for col in feature_cols:
168
+ if col not in df_feat.columns:
169
+ df_feat[col] = 0.0
170
+ df_feat = df_feat[feature_cols]
171
+ explainer = shap.TreeExplainer(model)
172
+ sv = explainer.shap_values(df_feat)
173
+ if isinstance(sv, list):
174
+ sv = sv[1]
175
+ return sv, explainer.expected_value if not isinstance(explainer.expected_value, list) else explainer.expected_value[1], df_feat
176
+
177
+ # ─── Main App ─────────────────────────────────────────────────────────────────
178
+ def main():
179
+ st.markdown('<h1 class="main-header">💳 Credit Invisibility Solver</h1>', unsafe_allow_html=True)
180
+ st.markdown(
181
+ "<p style='text-align:center; color:#555; font-size:1.1rem;'>"
182
+ "Alternative data ML pipeline to score the 1.7B credit-invisible population"
183
+ "</p>", unsafe_allow_html=True
184
+ )
185
+ st.divider()
186
+
187
+ # Load models
188
+ try:
189
+ models = load_models()
190
+ pca, scaler, feature_cols = load_artifacts()
191
+ sbert = load_sbert()
192
+ model_loaded = len(models) > 0
193
+ except Exception as e:
194
+ st.error(f"⚠️ Could not load models: {e}. Running in demo mode.")
195
+ model_loaded = False
196
+ models, pca, scaler, feature_cols = [], None, None, []
197
+
198
+ # ── Sidebar ───────────────────────────────────────────────────────────────
199
+ with st.sidebar:
200
+ st.image("https://img.shields.io/badge/Model-LightGBM%20%2B%20XGBoost-brightgreen", use_container_width=True)
201
+ st.markdown("### 🎛️ Applicant Profile")
202
+
203
+ st.markdown('<div class="sidebar-section">Financial Info</div>', unsafe_allow_html=True)
204
+ income = st.number_input("Annual Income (₹)", 10000, 10000000, 250000, step=10000)
205
+ credit_amount = st.number_input("Requested Credit (₹)", 10000, 5000000, 500000, step=10000)
206
+ annuity = st.number_input("Monthly Annuity (₹)", 1000, 200000, 15000, step=1000)
207
+
208
+ st.markdown('<div class="sidebar-section">Personal Info</div>', unsafe_allow_html=True)
209
+ age = st.slider("Age (years)", 20, 70, 35)
210
+ employment_yrs = st.slider("Employment Years", 0, 40, 5)
211
+ family_size = st.slider("Family Size", 1, 10, 3)
212
+ n_children = st.slider("Number of Children", 0, 5, 0)
213
+
214
+ st.markdown('<div class="sidebar-section">Assets</div>', unsafe_allow_html=True)
215
+ owns_realty = st.checkbox("Owns Property", True)
216
+ owns_car = st.checkbox("Owns Car", False)
217
+
218
+ st.markdown('<div class="sidebar-section">Credit Bureau Signals</div>', unsafe_allow_html=True)
219
+ ext_score_1 = st.slider("External Score 1 (Bureau)", 0.0, 1.0, 0.6, 0.01)
220
+ ext_score_2 = st.slider("External Score 2 (Behaviour)", 0.0, 1.0, 0.55, 0.01)
221
+ ext_score_3 = st.slider("External Score 3 (Alt Data)", 0.0, 1.0, 0.50, 0.01)
222
+ bureau_count = st.number_input("# Previous Bureau Enquiries", 0, 50, 2)
223
+ bureau_active = st.number_input("# Active Bureau Credits", 0, 20, 1)
224
+ total_enquiries= st.number_input("# Total Loan Enquiries", 0, 100, 3)
225
+ doc_count = st.number_input("# Documents Submitted", 0, 20, 5)
226
+
227
+ predict_btn = st.button("🔮 Score Applicant", use_container_width=True, type="primary")
228
+
229
+ # ── Main Panels ───────────────────────────────────────────────────────────
230
+ col1, col2, col3 = st.columns(3)
231
+
232
+ inputs = dict(
233
+ income=income, credit_amount=credit_amount, annuity=annuity,
234
+ age=age, employment_years=employment_yrs, family_size=family_size,
235
+ n_children=n_children, owns_realty=owns_realty, owns_car=owns_car,
236
+ ext_score_1=ext_score_1, ext_score_2=ext_score_2, ext_score_3=ext_score_3,
237
+ bureau_count=bureau_count, bureau_active=bureau_active,
238
+ total_enquiries=total_enquiries, doc_count=doc_count, has_children=n_children>0,
239
+ )
240
+
241
+ if predict_btn or True: # Show demo on load
242
+ with st.spinner("Running ML pipeline..."):
243
+ df_feat = build_single_applicant_features(inputs, pca, sbert)
244
+
245
+ if model_loaded:
246
+ risk_score = predict_risk(df_feat, models, feature_cols)
247
+ else:
248
+ # Demo mode — compute heuristic score
249
+ risk_score = float(np.clip(
250
+ 0.9 - 0.4*np.mean([ext_score_1,ext_score_2,ext_score_3])
251
+ - 0.1*(employment_yrs/40)
252
+ + 0.15*(credit_amount/income if income>0 else 0.5)
253
+ + np.random.normal(0, 0.02),
254
+ 0.01, 0.99
255
+ ))
256
+
257
+ label, css_class, color, icon = risk_band(risk_score)
258
+ credit_score = int(300 + (1 - risk_score) * 550) # map to 300-850 range
259
+
260
+ # ── KPI Row ───────────────────────────────────────────────────────────
261
+ col1.metric("Default Probability", f"{risk_score*100:.1f}%", delta=f"{(risk_score-0.5)*100:+.1f}% vs avg")
262
+ col2.metric("Alt Credit Score", f"{credit_score}", delta=None)
263
+ col3.metric("Risk Band", f"{icon} {label}", delta=None)
264
+
265
+ st.divider()
266
+
267
+ # ── Risk Card ─────────────────────────────────────────────────────────
268
+ st.markdown(f'<div class="{css_class}"><b>{icon} Risk Assessment: {label}</b><br>'
269
+ f'Default probability: <b>{risk_score*100:.1f}%</b> | '
270
+ f'Alternative credit score: <b>{credit_score}/850</b></div>',
271
+ unsafe_allow_html=True)
272
+
273
+ st.divider()
274
+
275
+ # ── Tabs ──────────────────────────────────────────────────────────────
276
+ tab1, tab2, tab3, tab4 = st.tabs(["📊 Score Breakdown", "🔍 SHAP Explainability", "📉 Drift Simulation", "📋 Feature Profile"])
277
+
278
+ with tab1:
279
+ c1, c2 = st.columns(2)
280
+
281
+ # Gauge chart
282
+ fig_gauge = go.Figure(go.Indicator(
283
+ mode="gauge+number+delta",
284
+ value=credit_score,
285
+ delta={"reference": 650, "valueformat": ".0f"},
286
+ title={"text": "Alternative Credit Score", "font": {"size": 18}},
287
+ gauge={
288
+ "axis": {"range": [300, 850]},
289
+ "bar": {"color": color},
290
+ "steps": [
291
+ {"range": [300, 550], "color": "#FFEBEE"},
292
+ {"range": [550, 650], "color": "#FFF8E1"},
293
+ {"range": [650, 750], "color": "#E8F5E9"},
294
+ {"range": [750, 850], "color": "#C8E6C9"},
295
+ ],
296
+ "threshold": {"line": {"color": "red", "width": 4}, "thickness": 0.75, "value": 650},
297
+ }
298
+ ))
299
+ fig_gauge.update_layout(height=280, margin=dict(t=30, b=10))
300
+ c1.plotly_chart(fig_gauge, use_container_width=True)
301
+
302
+ # Risk factor radar
303
+ categories = ["External Scores", "Income Stability", "Credit Utilisation", "Payment Behaviour", "Alt Data"]
304
+ ext_val = np.mean([ext_score_1, ext_score_2, ext_score_3])
305
+ values = [
306
+ ext_val,
307
+ min(employment_yrs / 20, 1.0),
308
+ max(0, 1 - credit_amount / (income + 1) / 3),
309
+ ext_val * 0.9,
310
+ min(doc_count / 10, 1.0),
311
+ ]
312
+
313
+ fig_radar = go.Figure(go.Scatterpolar(
314
+ r=values + [values[0]],
315
+ theta=categories + [categories[0]],
316
+ fill="toself", fillcolor=f"rgba{tuple(int(color.lstrip('#')[i:i+2],16) for i in (0,2,4)) + (0.2,)}",
317
+ line=dict(color=color, width=2),
318
+ name="Applicant"
319
+ ))
320
+ fig_radar.update_layout(
321
+ polar=dict(radialaxis=dict(visible=True, range=[0,1])),
322
+ height=280, margin=dict(t=30, b=10),
323
+ title="Risk Factor Radar"
324
+ )
325
+ c2.plotly_chart(fig_radar, use_container_width=True)
326
+
327
+ with tab2:
328
+ if model_loaded and models:
329
+ st.markdown("#### SHAP Feature Attribution")
330
+ st.info("SHAP values show how each feature pushes the default probability up ↑ or down ↓")
331
+
332
+ sv, base_val, df_aligned = get_shap_values(models[0], df_feat.copy(), feature_cols)
333
+
334
+ # Sort by absolute SHAP
335
+ shap_df = pd.DataFrame({
336
+ "Feature": df_aligned.columns,
337
+ "SHAP": sv[0],
338
+ "Value": df_aligned.iloc[0].values,
339
+ }).sort_values("SHAP", key=abs, ascending=False).head(15)
340
+
341
+ colors = ["#F44336" if v > 0 else "#4CAF50" for v in shap_df["SHAP"]]
342
+ fig_shap = go.Figure(go.Bar(
343
+ x=shap_df["SHAP"], y=shap_df["Feature"],
344
+ orientation="h", marker_color=colors,
345
+ text=[f"{v:+.4f}" for v in shap_df["SHAP"]], textposition="outside",
346
+ ))
347
+ fig_shap.update_layout(
348
+ title="Top 15 SHAP Feature Contributions (Red = Increases Risk, Green = Decreases)",
349
+ xaxis_title="SHAP Value", height=500,
350
+ margin=dict(l=150)
351
+ )
352
+ st.plotly_chart(fig_shap, use_container_width=True)
353
+ else:
354
+ st.warning("⚠️ Load trained models to see SHAP explanations.")
355
+ # Show mock
356
+ mock_features = ["EXT_SOURCE_MEAN","CREDIT_INCOME_RATIO","AGE_YEARS","EMPLOYMENT_YEARS","BUREAU_COUNT",
357
+ "EXT_SOURCE_3","NLP_EMB_0","ANNUITY_INCOME_RATIO","EXT_SOURCE_1","TOTAL_ENQUIRIES"]
358
+ mock_shap = np.array([-0.35, 0.28, -0.18, -0.12, 0.09, -0.22, -0.08, 0.15, -0.11, 0.06])
359
+ colors = ["#F44336" if v > 0 else "#4CAF50" for v in mock_shap]
360
+ fig_mock = go.Figure(go.Bar(
361
+ x=mock_shap, y=mock_features, orientation="h",
362
+ marker_color=colors, text=[f"{v:+.3f}" for v in mock_shap], textposition="outside"
363
+ ))
364
+ fig_mock.update_layout(title="Demo SHAP (load models for real values)", height=400, margin=dict(l=200))
365
+ st.plotly_chart(fig_mock, use_container_width=True)
366
+
367
+ with tab3:
368
+ st.markdown("#### Concept Drift Sensitivity Analysis")
369
+ st.markdown("How does this applicant's risk score change under economic shocks?")
370
+
371
+ income_mults = np.linspace(0.2, 1.0, 9)
372
+ drift_scores = []
373
+ for mult in income_mults:
374
+ drift_inp = dict(inputs)
375
+ drift_inp["income"] = inputs["income"] * mult
376
+ df_d = build_single_applicant_features(drift_inp, pca, sbert)
377
+ if model_loaded:
378
+ s = predict_risk(df_d, models, feature_cols)
379
+ else:
380
+ s = float(np.clip(risk_score + (1-mult)*0.25, 0, 0.99))
381
+ drift_scores.append(s)
382
+
383
+ fig_drift = go.Figure()
384
+ fig_drift.add_trace(go.Scatter(
385
+ x=income_mults*100, y=[s*100 for s in drift_scores],
386
+ mode="lines+markers", name="Default Probability",
387
+ line=dict(color="#F44336", width=2.5),
388
+ marker=dict(size=8, color=[
389
+ "#4CAF50" if s < 0.15 else "#FF9800" if s < 0.4 else "#F44336"
390
+ for s in drift_scores
391
+ ])
392
+ ))
393
+ fig_drift.add_hline(y=40, line_dash="dash", line_color="orange", annotation_text="Medium Risk Threshold")
394
+ fig_drift.add_hline(y=15, line_dash="dash", line_color="green", annotation_text="Low Risk Threshold")
395
+ fig_drift.update_layout(
396
+ title="Default Probability vs Income Shock Severity",
397
+ xaxis_title="Remaining Income (%)", yaxis_title="Default Probability (%)",
398
+ height=400
399
+ )
400
+ st.plotly_chart(fig_drift, use_container_width=True)
401
+
402
+ with tab4:
403
+ st.markdown("#### Applicant Feature Summary")
404
+ profile_data = {
405
+ "Feature": ["Annual Income", "Requested Credit", "Credit/Income Ratio", "Age",
406
+ "Employment Years", "Ext Score (Mean)", "Alt Credit Score", "Family Size"],
407
+ "Value": [f"₹{income:,.0f}", f"₹{credit_amount:,.0f}",
408
+ f"{credit_amount/max(income,1):.2f}x", f"{age} yrs",
409
+ f"{employment_yrs} yrs", f"{np.mean([ext_score_1,ext_score_2,ext_score_3]):.3f}",
410
+ f"{credit_score}/850", f"{family_size} members"],
411
+ "Status": ["✅" if income > 200000 else "⚠️",
412
+ "✅" if credit_amount < income*3 else "⚠️",
413
+ "✅" if credit_amount/max(income,1) < 2.5 else "🚨",
414
+ "✅", "✅" if employment_yrs > 2 else "⚠️",
415
+ "✅" if np.mean([ext_score_1,ext_score_2,ext_score_3]) > 0.5 else "🚨",
416
+ "✅" if credit_score > 650 else "⚠️", "✅"],
417
+ }
418
+ st.dataframe(pd.DataFrame(profile_data), use_container_width=True, hide_index=True)
419
+
420
+ # ── Footer ────────────────────────────────────────────────────────────────
421
+ st.divider()
422
+ st.markdown(
423
+ "<p style='text-align:center; font-size:0.8rem; color:#999;'>"
424
+ "Built with LightGBM + XGBoost + Sentence-BERT + SHAP + River (ADWIN) + W&B | "
425
+ "Home Credit Default Risk Dataset | "
426
+ "For the 1.7B credit-invisible 🌍"
427
+ "</p>", unsafe_allow_html=True
428
+ )
429
+
430
+ if __name__ == "__main__":
431
+ main()
explainable-credit-risk-modeling-with-schduling.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/.virtual_documents/__notebook_source__.ipynb ADDED
@@ -0,0 +1,1196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ get_ipython().getoutput("uv pip install \")
2
+ lightgbm==4.3.0 \
3
+ xgboost==2.0.3 \
4
+ optuna \
5
+ optuna-integration \
6
+ shap \
7
+ river \
8
+ wandb \
9
+ kaggle \
10
+ plotly \
11
+ seaborn \
12
+ imbalanced-learn \
13
+ category-encoders category_encoders==2.6.3 transformers==4.41.2 sentence-transformers==2.7.0 numpy==1.26.4 scipy==1.11.4 scikit-learn==1.4.2
14
+
15
+
16
+ import transformers, sentence_transformers
17
+ print(transformers.__version__)
18
+ print(sentence_transformers.__version__)
19
+
20
+
21
+ import os, gc, warnings, json, re
22
+ import numpy as np
23
+ import pandas as pd
24
+ import matplotlib.pyplot as plt
25
+ import seaborn as sn
26
+ import plotly.express as px
27
+ import plotly.graph_objects as go
28
+ warnings.filterwarnings("ignore")
29
+
30
+ # Core ML
31
+ import lightgbm as lgb
32
+ import xgboost as xgb
33
+ from sklearn.model_selection import StratifiedKFold, cross_val_score
34
+ from sklearn.preprocessing import LabelEncoder, StandardScaler, RobustScaler
35
+ from sklearn.pipeline import Pipeline
36
+ from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix
37
+ from sklearn.impute import SimpleImputer
38
+ from imblearn.over_sampling import SMOTE
39
+ import category_encoders as ce
40
+ import optuna
41
+ from optuna.integration import LightGBMPruningCallback
42
+ optuna.logging.set_verbosity(optuna.logging.WARNING)
43
+ import shap
44
+ from river import stream, drift, linear_model, preprocessing as river_preprocessing, metrics as river_metrics, ensemble as river_ensemble
45
+ import wandb
46
+
47
+ from sentence_transformers import SentenceTransformer
48
+ import transformers
49
+
50
+
51
+ wandb.login(key=os.environ.get("WANDB_API_KEY", "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"))
52
+
53
+
54
+
55
+
56
+ from dataclasses import dataclass, field
57
+ from typing import List
58
+
59
+ @dataclass
60
+ class CFG:
61
+ # Paths
62
+ DATA_DIR: str = "/kaggle/input/competitions/home-credit-default-risk"
63
+ OUTPUT_DIR: str = "/kaggle/working"
64
+ MODEL_DIR: str = "/kaggle/working/models"
65
+
66
+ # Experiment
67
+ EXPERIMENT_NAME: str = "credit-invisibility"
68
+ SEED: int = 42
69
+ N_FOLDS: int = 5
70
+
71
+ # Training
72
+ LGBM_N_ITER: int = 2000
73
+ XGB_N_ITER: int = 1500
74
+ EARLY_STOPPING: int = 100
75
+ OPTUNA_TRIALS: int = 80
76
+
77
+ # NLP
78
+ SBERT_MODEL: str = "all-MiniLM-L6-v2" # lightweight, fast
79
+ NLP_DIM_REDUCTION: int = 32 # PCA to 32 dims
80
+
81
+ # Drift simulation
82
+ DRIFT_FRACTION: float = 0.15
83
+ DRIFT_INCOME_MULTIPLIER: float = 0.4
84
+
85
+ # W&B
86
+ WANDB_PROJECT: str = "credit-invisibility"
87
+ WANDB_ENTITY: str = None # set your username
88
+
89
+ cfg = CFG()
90
+ os.makedirs(cfg.MODEL_DIR, exist_ok=True)
91
+ np.random.seed(cfg.SEED)
92
+ print("✅ Config loaded:", cfg)
93
+
94
+
95
+ def load_all_tables(data_dir: str) -> dict:
96
+ tables = {}
97
+ files = {
98
+ "app_train": "application_train.csv",
99
+ "app_test": "application_test.csv",
100
+ "bureau": "bureau.csv",
101
+ "bureau_balance": "bureau_balance.csv",
102
+ "prev_app": "previous_application.csv",
103
+ "pos_cash": "POS_CASH_balance.csv",
104
+ "installments": "installments_payments.csv",
105
+ "credit_card": "credit_card_balance.csv",
106
+ }
107
+ for key, fname in files.items():
108
+ path = os.path.join(data_dir, fname)
109
+ if os.path.exists(path):
110
+ tables[key] = pd.read_csv(path)
111
+ print(f" ✅ {key:20s} → {tables[key].shape}")
112
+ else:
113
+ print(f" {key:20s} → NOT FOUND")
114
+ return tables
115
+
116
+ print("📂 Loading all Home Credit tables...")
117
+ tables = load_all_tables(cfg.DATA_DIR)
118
+
119
+ # Quick snapshot
120
+ train = tables["app_train"]
121
+ test = tables["app_test"]
122
+
123
+ print(f"\n Train shape: {train.shape}")
124
+ print(f" Test shape: {test.shape}")
125
+ print(f"\n Target distribution:\n{train['TARGET'].value_counts(normalize=True).round(3)}")
126
+ print(f"\n Class imbalance ratio: {train['TARGET'].value_counts()[0]/train['TARGET'].value_counts()[1]:.1f}:1")
127
+
128
+
129
+ fig, axes = plt.subplots(2, 3, figsize=(18, 10))
130
+ fig.suptitle("Home Credit — Exploratory Data Analysis", fontsize=16, fontweight="bold")
131
+
132
+ # 1. Target distribution
133
+ axes[0,0].pie(
134
+ train["TARGET"].value_counts(),
135
+ labels=["Non-Default (0)", "Default (1)"],
136
+ autopct="%1.1f%%", colors=["#2196F3","#F44336"], startangle=90
137
+ )
138
+ axes[0,0].set_title("Target Distribution")
139
+
140
+ # 2. Income distribution by target
141
+ for t, color in zip([0, 1], ["#2196F3", "#F44336"]):
142
+ subset = train[train["TARGET"] == t]["AMT_INCOME_TOTAL"].clip(0, 500000)
143
+ axes[0,1].hist(subset, bins=50, alpha=0.6, label=f"Target={t}", color=color)
144
+ axes[0,1].set_title("Income Distribution by Target")
145
+ axes[0,1].legend()
146
+ axes[0,1].set_xlabel("Annual Income")
147
+
148
+ # 3. Credit amount by target
149
+ for t, color in zip([0, 1], ["#2196F3", "#F44336"]):
150
+ subset = train[train["TARGET"] == t]["AMT_CREDIT"].clip(0, 2000000)
151
+ axes[0,2].hist(subset, bins=50, alpha=0.6, label=f"Target={t}", color=color)
152
+ axes[0,2].set_title("Credit Amount by Target")
153
+ axes[0,2].legend()
154
+
155
+ # 4. Missing value heatmap (top 20)
156
+ missing = (train.isnull().sum() / len(train) * 100).sort_values(ascending=False).head(20)
157
+ axes[1,0].barh(missing.index, missing.values, color="#FF9800")
158
+ axes[1,0].set_title("Top 20 Missing Value %")
159
+ axes[1,0].set_xlabel("Missing %")
160
+
161
+ # 5. Age distribution
162
+ age_years = train["DAYS_BIRTH"].abs() / 365
163
+ for t, color in zip([0, 1], ["#2196F3", "#F44336"]):
164
+ subset = age_years[train["TARGET"] == t]
165
+ axes[1,1].hist(subset, bins=40, alpha=0.6, label=f"Target={t}", color=color)
166
+ axes[1,1].set_title("Age Distribution by Target")
167
+ axes[1,1].set_xlabel("Age (years)")
168
+ axes[1,1].legend()
169
+
170
+ # 6. Occupation type default rate
171
+ occ_default = train.groupby("OCCUPATION_TYPE")["TARGET"].mean().sort_values(ascending=False)
172
+ axes[1,2].barh(occ_default.index, occ_default.values, color="#9C27B0")
173
+ axes[1,2].set_title("Default Rate by Occupation")
174
+ axes[1,2].set_xlabel("Default Rate")
175
+
176
+ plt.tight_layout()
177
+ plt.savefig(f"{cfg.OUTPUT_DIR}/eda_overview.png", dpi=150, bbox_inches="tight")
178
+ plt.show()
179
+ print("✅ EDA plots saved")
180
+
181
+
182
+ def engineer_bureau_features(bureau: pd.DataFrame, bureau_balance: pd.DataFrame) -> pd.DataFrame:
183
+ """Extract rich signals from external credit bureau data."""
184
+
185
+ # Bureau balance — rolling DPD (days past due) features
186
+ bb_agg = bureau_balance.groupby("SK_ID_BUREAU").agg(
187
+ STATUS_WORST = ("STATUS", lambda x: x.map({"C":0,"0":0,"1":1,"2":2,"3":3,"4":4,"5":5,"X":0}).max()),
188
+ STATUS_MEAN = ("STATUS", lambda x: x.map({"C":0,"0":0,"1":1,"2":2,"3":3,"4":4,"5":5,"X":0}).mean()),
189
+ MONTHS_COUNT = ("MONTHS_BALANCE", "count"),
190
+ ).reset_index()
191
+
192
+ bureau = bureau.merge(bb_agg, on="SK_ID_BUREAU", how="left")
193
+
194
+ # Active vs closed credit
195
+ bureau["CREDIT_ACTIVE_BINARY"] = (bureau["CREDIT_ACTIVE"] == "Active").astype(int)
196
+ bureau["DAYS_CREDIT_ENDDATE"] = bureau["DAYS_CREDIT_ENDDATE"].clip(-3000, 3000)
197
+ bureau["DEBT_CREDIT_RATIO"] = bureau["AMT_CREDIT_SUM_DEBT"] / (bureau["AMT_CREDIT_SUM"] + 1)
198
+ bureau["CREDIT_UTIL_RATE"] = bureau["AMT_CREDIT_SUM_OVERDUE"] / (bureau["AMT_CREDIT_SUM"] + 1)
199
+
200
+ aggregations = {
201
+ "DAYS_CREDIT": ["mean","min","max","std"],
202
+ "CREDIT_DAY_OVERDUE": ["mean","max","sum"],
203
+ "DAYS_CREDIT_ENDDATE": ["mean","min","max"],
204
+ "AMT_CREDIT_SUM": ["mean","max","sum"],
205
+ "AMT_CREDIT_SUM_DEBT": ["mean","max","sum"],
206
+ "AMT_CREDIT_SUM_OVERDUE": ["mean","max","sum"],
207
+ "DEBT_CREDIT_RATIO": ["mean","max"],
208
+ "CREDIT_UTIL_RATE": ["mean","max"],
209
+ "CREDIT_ACTIVE_BINARY": ["mean","sum"],
210
+ "STATUS_WORST": ["mean","max"],
211
+ "STATUS_MEAN": ["mean"],
212
+ "MONTHS_COUNT": ["mean","sum"],
213
+ "CNT_CREDIT_PROLONG": ["sum","mean"],
214
+ }
215
+
216
+ bureau_agg = bureau.groupby("SK_ID_CURR").agg(aggregations)
217
+ bureau_agg.columns = ["BUREAU_" + "_".join(col).upper() for col in bureau_agg.columns]
218
+
219
+ # Count of bureau records (signal for credit footprint)
220
+ bureau_agg["BUREAU_COUNT"] = bureau.groupby("SK_ID_CURR").size()
221
+ bureau_agg["BUREAU_ACTIVE_COUNT"] = bureau.groupby("SK_ID_CURR")["CREDIT_ACTIVE_BINARY"].sum()
222
+
223
+ return bureau_agg.reset_index()
224
+
225
+ bureau_features = engineer_bureau_features(tables["bureau"], tables["bureau_balance"])
226
+ print(f"✅ Bureau features: {bureau_features.shape}")
227
+
228
+
229
+ def engineer_prev_app_features(prev: pd.DataFrame) -> pd.DataFrame:
230
+ prev["APP_CREDIT_RATIO"] = prev["AMT_APPLICATION"] / (prev["AMT_CREDIT"] + 1)
231
+ prev["DOWN_PAYMENT_RATIO"] = prev["AMT_DOWN_PAYMENT"] / (prev["AMT_CREDIT"] + 1)
232
+ prev["ANNUITY_CREDIT_RATIO"] = prev["AMT_ANNUITY"] / (prev["AMT_CREDIT"] + 1)
233
+ prev["APPROVED"] = (prev["NAME_CONTRACT_STATUS"] == "Approved").astype(int)
234
+ prev["REFUSED"] = (prev["NAME_CONTRACT_STATUS"] == "Refused").astype(int)
235
+
236
+ agg = prev.groupby("SK_ID_CURR").agg(
237
+ PREV_COUNT = ("SK_ID_PREV", "count"),
238
+ PREV_APPROVED_RATE = ("APPROVED", "mean"),
239
+ PREV_REFUSED_RATE = ("REFUSED", "mean"),
240
+ PREV_APP_CREDIT_RATIO_MEAN= ("APP_CREDIT_RATIO", "mean"),
241
+ PREV_ANNUITY_MEAN = ("AMT_ANNUITY", "mean"),
242
+ PREV_CREDIT_MEAN = ("AMT_CREDIT", "mean"),
243
+ PREV_DAYS_DECISION_MEAN = ("DAYS_DECISION", "mean"),
244
+ PREV_DAYS_DECISION_MIN = ("DAYS_DECISION", "min"),
245
+ PREV_GOODS_PRICE_MEAN = ("AMT_GOODS_PRICE", "mean"),
246
+ ).reset_index()
247
+ agg.columns = ["SK_ID_CURR"] + ["PREV_" + c if not c.startswith("PREV") else c for c in agg.columns[1:]]
248
+ return agg
249
+
250
+ def engineer_installments_features(inst: pd.DataFrame) -> pd.DataFrame:
251
+ inst["PAYMENT_DIFF"] = inst["AMT_INSTALMENT"] - inst["AMT_PAYMENT"]
252
+ inst["DAYS_ENTRY_DIFF"] = inst["DAYS_INSTALMENT"] - inst["DAYS_ENTRY_PAYMENT"]
253
+ inst["LATE_PAYMENT"] = (inst["DAYS_ENTRY_DIFF"] > 0).astype(int)
254
+ inst["SHORT_PAYMENT"] = (inst["PAYMENT_DIFF"] > 0).astype(int)
255
+
256
+ agg = inst.groupby("SK_ID_CURR").agg(
257
+ INST_PAYMENT_DIFF_MEAN = ("PAYMENT_DIFF", "mean"),
258
+ INST_PAYMENT_DIFF_MAX = ("PAYMENT_DIFF", "max"),
259
+ INST_DAYS_ENTRY_DIFF_MEAN=("DAYS_ENTRY_DIFF", "mean"),
260
+ INST_LATE_PAYMENT_RATE = ("LATE_PAYMENT", "mean"),
261
+ INST_SHORT_PAYMENT_RATE = ("SHORT_PAYMENT", "mean"),
262
+ INST_COUNT = ("SK_ID_PREV", "count"),
263
+ ).reset_index()
264
+ return agg
265
+
266
+ def engineer_pos_cash_features(pos: pd.DataFrame) -> pd.DataFrame:
267
+ pos["DPD_BINARY"] = (pos["SK_DPD"] > 0).astype(int)
268
+ agg = pos.groupby("SK_ID_CURR").agg(
269
+ POS_MONTHS_COUNT = ("MONTHS_BALANCE", "count"),
270
+ POS_SK_DPD_MEAN = ("SK_DPD", "mean"),
271
+ POS_SK_DPD_MAX = ("SK_DPD", "max"),
272
+ POS_DPD_RATE = ("DPD_BINARY", "mean"),
273
+ POS_CNT_INSTALMENT_MEAN=("CNT_INSTALMENT", "mean"),
274
+ ).reset_index()
275
+ return agg
276
+
277
+ def engineer_credit_card_features(cc: pd.DataFrame) -> pd.DataFrame:
278
+ cc["UTIL_RATE"] = cc["AMT_BALANCE"] / (cc["AMT_CREDIT_LIMIT_ACTUAL"] + 1)
279
+ cc["DRAWING_RATE"]= cc["AMT_DRAWINGS_CURRENT"] / (cc["AMT_CREDIT_LIMIT_ACTUAL"] + 1)
280
+ agg = cc.groupby("SK_ID_CURR").agg(
281
+ CC_UTIL_RATE_MEAN = ("UTIL_RATE", "mean"),
282
+ CC_UTIL_RATE_MAX = ("UTIL_RATE", "max"),
283
+ CC_DRAWING_RATE_MEAN= ("DRAWING_RATE", "mean"),
284
+ CC_AMT_BALANCE_MEAN = ("AMT_BALANCE", "mean"),
285
+ CC_COUNT = ("SK_ID_PREV", "count"),
286
+ CC_DPD_MEAN = ("SK_DPD", "mean"),
287
+ ).reset_index()
288
+ return agg
289
+
290
+ prev_features = engineer_prev_app_features(tables["prev_app"])
291
+ inst_features = engineer_installments_features(tables["installments"])
292
+ pos_features = engineer_pos_cash_features(tables["pos_cash"])
293
+ cc_features = engineer_credit_card_features(tables["credit_card"])
294
+
295
+ print(f"✅ Prev app features: {prev_features.shape}")
296
+ print(f"✅ Installments features:{inst_features.shape}")
297
+ print(f"✅ POS Cash features: {pos_features.shape}")
298
+ print(f"✅ Credit Card features:{cc_features.shape}")
299
+
300
+
301
+ def engineer_app_features(df: pd.DataFrame) -> pd.DataFrame:
302
+ df = df.copy()
303
+
304
+ # Domain knowledge ratios
305
+ df["CREDIT_INCOME_RATIO"] = df["AMT_CREDIT"] / (df["AMT_INCOME_TOTAL"] + 1)
306
+ df["ANNUITY_INCOME_RATIO"] = df["AMT_ANNUITY"] / (df["AMT_INCOME_TOTAL"] + 1)
307
+ df["CREDIT_TERM"] = df["AMT_ANNUITY"] / (df["AMT_CREDIT"] + 1)
308
+ df["GOODS_CREDIT_RATIO"] = df["AMT_GOODS_PRICE"] / (df["AMT_CREDIT"] + 1)
309
+
310
+ # Age and employment signals
311
+ df["AGE_YEARS"] = df["DAYS_BIRTH"].abs() / 365.25
312
+ df["EMPLOYMENT_YEARS"] = df["DAYS_EMPLOYED"].apply(lambda x: abs(x)/365.25 if x < 0 else 0)
313
+ df["EMPLOYED_RATIO"] = df["EMPLOYMENT_YEARS"] / (df["AGE_YEARS"] + 1)
314
+ df["CREDIT_TO_AGE"] = df["AMT_CREDIT"] / (df["AGE_YEARS"] + 1)
315
+
316
+ # Family / social signals
317
+ df["INCOME_PER_PERSON"] = df["AMT_INCOME_TOTAL"] / (df["CNT_FAM_MEMBERS"] + 1)
318
+ df["CHILDREN_RATIO"] = df["CNT_CHILDREN"] / (df["CNT_FAM_MEMBERS"] + 1)
319
+
320
+ # External scores (most predictive in Home Credit baseline)
321
+ df["EXT_SOURCE_MEAN"] = df[["EXT_SOURCE_1","EXT_SOURCE_2","EXT_SOURCE_3"]].mean(axis=1)
322
+ df["EXT_SOURCE_MIN"] = df[["EXT_SOURCE_1","EXT_SOURCE_2","EXT_SOURCE_3"]].min(axis=1)
323
+ df["EXT_SOURCE_PROD"] = df["EXT_SOURCE_1"] * df["EXT_SOURCE_2"] * df["EXT_SOURCE_3"]
324
+ df["EXT_SOURCE_STD"] = df[["EXT_SOURCE_1","EXT_SOURCE_2","EXT_SOURCE_3"]].std(axis=1)
325
+ df["EXT1_EXT2_INTERACTION"] = df["EXT_SOURCE_1"] * df["EXT_SOURCE_2"]
326
+ df["EXT2_EXT3_INTERACTION"] = df["EXT_SOURCE_2"] * df["EXT_SOURCE_3"]
327
+ df["EXT_CREDIT_RATIO"] = df["EXT_SOURCE_MEAN"] * df["CREDIT_INCOME_RATIO"]
328
+
329
+ # Document flags — count missing documents
330
+ doc_cols = [c for c in df.columns if "FLAG_DOCUMENT" in c]
331
+ df["DOCUMENT_COUNT"] = df[doc_cols].sum(axis=1)
332
+
333
+ # Enquiry signals (loan shopping behavior)
334
+ enq_cols = [c for c in df.columns if "AMT_REQ_CREDIT_BUREAU" in c]
335
+ df["TOTAL_ENQUIRIES"] = df[enq_cols].sum(axis=1)
336
+ df["RECENT_ENQUIRY_RATIO"] = df.get("AMT_REQ_CREDIT_BUREAU_WEEK", pd.Series(0, index=df.index)) / (df["TOTAL_ENQUIRIES"] + 1)
337
+
338
+ # Car & realty
339
+ df["HAS_CAR_REALTY"] = ((df["FLAG_OWN_CAR"] == "Y") & (df["FLAG_OWN_REALTY"] == "Y")).astype(int)
340
+
341
+ # Days registration relative to application
342
+ df["DAYS_REGISTRATION_RATIO"] = df["DAYS_REGISTRATION"] / (df["DAYS_BIRTH"] + 1)
343
+
344
+ # Label encode categoricals
345
+ cat_cols = df.select_dtypes("object").columns.tolist()
346
+ le = LabelEncoder()
347
+ for col in cat_cols:
348
+ df[col] = df[col].fillna("Unknown")
349
+ df[col] = le.fit_transform(df[col].astype(str))
350
+
351
+ return df
352
+
353
+ train_eng = engineer_app_features(train)
354
+ test_eng = engineer_app_features(test)
355
+ print(f"✅ Engineered features: {train_eng.shape}")
356
+
357
+
358
+ def generate_financial_descriptions(df: pd.DataFrame) -> list:
359
+ """
360
+ Synthesize a financial narrative per applicant from tabular data.
361
+ This simulates what a financial literacy assessment text would contain.
362
+ """
363
+ descriptions = []
364
+ for _, row in df.iterrows():
365
+ age = abs(row.get("DAYS_BIRTH", -365*35)) / 365
366
+ income = row.get("AMT_INCOME_TOTAL", 100000)
367
+ credit = row.get("AMT_CREDIT", 200000)
368
+ ext_score = row.get("EXT_SOURCE_MEAN", 0.5)
369
+ emp_years = max(0, -row.get("DAYS_EMPLOYED", -5*365)) / 365
370
+
371
+ # Map external score to financial literacy level
372
+ if ext_score > 0.65:
373
+ literacy = "demonstrates strong financial planning habits and consistently pays bills on time"
374
+ elif ext_score > 0.45:
375
+ literacy = "shows moderate financial awareness with occasional late payments"
376
+ else:
377
+ literacy = "has limited financial experience and irregular payment patterns"
378
+
379
+ desc = (
380
+ f"Applicant aged {age:.0f} years with annual income of {income:.0f} currency units. "
381
+ f"Requesting credit of {credit:.0f} for personal needs. "
382
+ f"Employed for {emp_years:.1f} years in current position. "
383
+ f"Client {literacy}. "
384
+ f"External credit assessment score: {ext_score:.2f}. "
385
+ f"{'Owns property which serves as collateral.' if row.get('FLAG_OWN_REALTY', 0) else 'No property ownership.'} "
386
+ f"{'Has dependents in household.' if row.get('CNT_CHILDREN', 0) > 0 else 'No children.'}"
387
+ )
388
+ descriptions.append(desc)
389
+ return descriptions
390
+
391
+ print("🔤 Generating financial narratives...")
392
+ train_texts = generate_financial_descriptions(train_eng)
393
+ test_texts = generate_financial_descriptions(test_eng)
394
+ print(f" Sample: {train_texts[0][:120]}...")
395
+
396
+ # Embed with Sentence-BERT
397
+ print("\n🤖 Loading SBERT model...")
398
+ sbert = SentenceTransformer(cfg.SBERT_MODEL)
399
+
400
+ print(" Encoding train texts (batch)...")
401
+ train_embeddings = sbert.encode(
402
+ train_texts, batch_size=512, show_progress_bar=True,
403
+ normalize_embeddings=True, convert_to_numpy=True
404
+ )
405
+
406
+ print(" Encoding test texts (batch)...")
407
+ test_embeddings = sbert.encode(
408
+ test_texts, batch_size=512, show_progress_bar=True,
409
+ normalize_embeddings=True, convert_to_numpy=True
410
+ )
411
+
412
+ print(f"\n✅ Embeddings shape: {train_embeddings.shape}")
413
+
414
+ # Reduce dims with PCA
415
+ from sklearn.decomposition import PCA
416
+ pca = PCA(n_components=cfg.NLP_DIM_REDUCTION, random_state=cfg.SEED)
417
+ train_emb_reduced = pca.fit_transform(train_embeddings)
418
+ test_emb_reduced = pca.transform(test_embeddings)
419
+
420
+ print(f"✅ After PCA: {train_emb_reduced.shape} | Explained variance: {pca.explained_variance_ratio_.sum():.3f}")
421
+
422
+ # Create DataFrame
423
+ emb_cols = [f"NLP_EMB_{i}" for i in range(cfg.NLP_DIM_REDUCTION)]
424
+ train_nlp_df = pd.DataFrame(train_emb_reduced, columns=emb_cols, index=train_eng.index)
425
+ test_nlp_df = pd.DataFrame(test_emb_reduced, columns=emb_cols, index=test_eng.index)
426
+
427
+ del sbert, train_embeddings, test_embeddings; gc.collect()
428
+ print("✅ NLP features ready")
429
+
430
+
431
+ def merge_all_features(app_df, bureau_feat, prev_feat, inst_feat, pos_feat, cc_feat, nlp_feat):
432
+ df = app_df.copy()
433
+ df = df.merge(bureau_feat, on="SK_ID_CURR", how="left")
434
+ df = df.merge(prev_feat, on="SK_ID_CURR", how="left")
435
+ df = df.merge(inst_feat, on="SK_ID_CURR", how="left")
436
+ df = df.merge(pos_feat, on="SK_ID_CURR", how="left")
437
+ df = df.merge(cc_feat, on="SK_ID_CURR", how="left")
438
+
439
+ # NLP features (index-aligned)
440
+ nlp_feat_reset = nlp_feat.reset_index(drop=True)
441
+ df = df.reset_index(drop=True)
442
+ df = pd.concat([df, nlp_feat_reset], axis=1)
443
+
444
+ return df
445
+
446
+ print("🔗 Merging all feature tables...")
447
+ train_full = merge_all_features(train_eng, bureau_features, prev_features, inst_features, pos_features, cc_features, train_nlp_df)
448
+ test_full = merge_all_features(test_eng, bureau_features, prev_features, inst_features, pos_features, cc_features, test_nlp_df)
449
+
450
+ print(f"✅ Final train shape: {train_full.shape}")
451
+ print(f"✅ Final test shape: {test_full.shape}")
452
+
453
+ # Separate target + features
454
+ TARGET = "TARGET"
455
+ DROP_COLS = ["TARGET", "SK_ID_CURR"]
456
+ FEATURE_COLS = [c for c in train_full.columns if c not in DROP_COLS]
457
+
458
+ X = train_full[FEATURE_COLS]
459
+ y = train_full[TARGET]
460
+ X_test_final = test_full[FEATURE_COLS]
461
+
462
+ print(f"\n✅ X: {X.shape} | y: {y.shape}")
463
+ print(f" NLP features: {sum(1 for c in FEATURE_COLS if 'NLP' in c)} | Tabular: {sum(1 for c in FEATURE_COLS if 'NLP' not in c)}")
464
+
465
+
466
+ def run_lgbm_baseline(X, y, X_test, cfg, params=None):
467
+ """5-fold CV LightGBM with W&B logging."""
468
+
469
+ run = wandb.init(
470
+ project=cfg.WANDB_PROJECT,
471
+ name="lgbm-baseline",
472
+ config=params or {},
473
+ tags=["baseline", "lgbm"]
474
+ )
475
+
476
+ default_params = {
477
+ "objective": "binary",
478
+ "metric": "auc",
479
+ "boosting_type": "gbdt",
480
+ "num_leaves": 127,
481
+ "learning_rate": 0.05,
482
+ "feature_fraction": 0.85,
483
+ "bagging_fraction": 0.85,
484
+ "bagging_freq": 5,
485
+ "min_child_samples": 20,
486
+ "reg_alpha": 0.1,
487
+ "reg_lambda": 0.1,
488
+ "n_jobs": -1,
489
+ "seed": cfg.SEED,
490
+ "verbose": -1,
491
+ }
492
+ if params:
493
+ default_params.update(params)
494
+
495
+ skf = StratifiedKFold(n_splits=cfg.N_FOLDS, shuffle=True, random_state=cfg.SEED)
496
+ oof_preds = np.zeros(len(X))
497
+ test_preds = np.zeros(len(X_test))
498
+ fold_scores = []
499
+ models = []
500
+
501
+ for fold, (trn_idx, val_idx) in enumerate(skf.split(X, y)):
502
+ X_trn, X_val = X.iloc[trn_idx], X.iloc[val_idx]
503
+ y_trn, y_val = y.iloc[trn_idx], y.iloc[val_idx]
504
+ y_trn = y_trn.astype(np.float32)
505
+ y_val = y_val.astype(np.float32)
506
+
507
+ #dtrain = lgb.Dataset(X_trn, label=y_trn)
508
+ #dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)
509
+ dtrain = lgb.Dataset(X_trn, label=np.asarray(y_trn, dtype=np.float32))
510
+ dval = lgb.Dataset(X_val, label=np.asarray(y_val, dtype=np.float32), reference=dtrain)
511
+
512
+ callbacks = [
513
+ lgb.early_stopping(cfg.EARLY_STOPPING, verbose=False),
514
+ lgb.log_evaluation(200),
515
+ ]
516
+
517
+ model = lgb.train(
518
+ default_params,
519
+ dtrain,
520
+ num_boost_round=cfg.LGBM_N_ITER,
521
+ valid_sets=[dval],
522
+ callbacks=callbacks,
523
+ )
524
+
525
+ oof_preds[val_idx] = model.predict(X_val, num_iteration=model.best_iteration)
526
+ test_preds += model.predict(X_test, num_iteration=model.best_iteration) / cfg.N_FOLDS
527
+
528
+ score = roc_auc_score(y_val, oof_preds[val_idx])
529
+ fold_scores.append(score)
530
+ models.append(model)
531
+
532
+ # W&B logging per fold
533
+ wandb.log({f"fold_{fold+1}_auc": score, "fold": fold+1})
534
+ print(f" Fold {fold+1} | AUC: {score:.5f} | Best iter: {model.best_iteration}")
535
+
536
+ oof_auc = roc_auc_score(y, oof_preds)
537
+ print(f"\n🏆 OOF AUC: {oof_auc:.5f} ± {np.std(fold_scores):.5f}")
538
+
539
+ # Log final metrics to W&B
540
+ wandb.log({
541
+ "oof_auc": oof_auc,
542
+ "fold_std": np.std(fold_scores),
543
+ "n_features": X.shape[1],
544
+ "n_train": len(X),
545
+ })
546
+
547
+ # Feature importance
548
+ fi = pd.DataFrame({
549
+ "feature": X.columns,
550
+ "importance": np.mean([m.feature_importance("gain") for m in models], axis=0)
551
+ }).sort_values("importance", ascending=False)
552
+
553
+ # Log top-20 feature importance table
554
+ wandb.log({"feature_importance": wandb.Table(dataframe=fi.head(20))})
555
+
556
+ run.finish()
557
+ return models, oof_preds, test_preds, oof_auc, fi
558
+
559
+ print("🚀 Running LightGBM Baseline...")
560
+ lgbm_models, lgbm_oof, lgbm_test, lgbm_auc, feat_imp = run_lgbm_baseline(X, y, X_test_final, cfg)
561
+
562
+
563
+ def objective_lgbm(trial, X, y, cfg):
564
+ params = {
565
+ "objective": "binary",
566
+ "metric": "auc",
567
+ "verbosity": -1,
568
+
569
+ # 🔥 GPU SETTINGS
570
+ "device": "gpu",
571
+ "gpu_platform_id": 0,
572
+ "gpu_device_id": 0,
573
+
574
+ # ⚡ Important for GPU stability
575
+ "max_bin": 255,
576
+ "gpu_use_dp": False,
577
+
578
+ # Your search space
579
+ "boosting_type": trial.suggest_categorical("boosting_type", ["gbdt", "dart"]),
580
+ "num_leaves": trial.suggest_int("num_leaves", 31, 128),
581
+ "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
582
+ "feature_fraction": trial.suggest_float("feature_fraction", 0.6, 1.0),
583
+ "bagging_fraction": trial.suggest_float("bagging_fraction", 0.6, 1.0),
584
+ "bagging_freq": trial.suggest_int("bagging_freq", 1, 7),
585
+ "min_child_samples": trial.suggest_int("min_child_samples", 5, 100),
586
+ "reg_alpha": trial.suggest_float("reg_alpha", 1e-4, 10.0, log=True),
587
+ "reg_lambda": trial.suggest_float("reg_lambda", 1e-4, 10.0, log=True),
588
+ "max_depth": trial.suggest_int("max_depth", 4, 10),
589
+ "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0.0, 0.5),
590
+
591
+ "n_jobs": -1,
592
+ "seed": cfg.SEED,
593
+ }
594
+
595
+ params["force_col_wise"] = True
596
+ skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=cfg.SEED)
597
+ scores = []
598
+
599
+ for trn_idx, val_idx in skf.split(X, y):
600
+ X_trn, X_val = X.iloc[trn_idx], X.iloc[val_idx]
601
+ y_trn, y_val = y.iloc[trn_idx], y.iloc[val_idx]
602
+
603
+
604
+ y_trn_np = np.asarray(y_trn, dtype=np.float32)
605
+ y_val_np = np.asarray(y_val, dtype=np.float32)
606
+
607
+ dtrain = lgb.Dataset(X_trn, label=y_trn_np)
608
+ dval = lgb.Dataset(X_val, label=y_val_np, reference=dtrain)
609
+
610
+
611
+
612
+ #dtrain = lgb.Dataset(X_trn, label=y_trn)
613
+ #dval = lgb.Dataset(X_val, label=y_val, reference=dtrain)
614
+
615
+ pruning_cb = LightGBMPruningCallback(trial, "auc")
616
+
617
+ model = lgb.train(
618
+ params, dtrain,
619
+ num_boost_round=1000,
620
+ valid_sets=[dval],
621
+ callbacks=[
622
+ lgb.early_stopping(50, verbose=False),
623
+ lgb.log_evaluation(-1),
624
+ pruning_cb,
625
+ ]
626
+ )
627
+ preds = model.predict(X_val, num_iteration=model.best_iteration)
628
+ scores.append(roc_auc_score(y_val, preds))
629
+
630
+ return np.mean(scores)
631
+
632
+ print("🔍 Running Optuna HPO for LightGBM...")
633
+ print(f" Trials: {cfg.OPTUNA_TRIALS}")
634
+
635
+ sampler = optuna.samplers.TPESampler(seed=cfg.SEED)
636
+ pruner = optuna.pruners.MedianPruner(n_warmup_steps=10)
637
+ study_lgbm = optuna.create_study(
638
+ direction="maximize", sampler=sampler, pruner=pruner,
639
+ study_name="lgbm-credit-hpo"
640
+ )
641
+
642
+ study_lgbm.optimize(
643
+ lambda trial: objective_lgbm(trial, X, y, cfg),
644
+ n_trials=cfg.OPTUNA_TRIALS,
645
+ show_progress_bar=True,
646
+ n_jobs=1,
647
+ )
648
+
649
+ best_lgbm_params = study_lgbm.best_params
650
+ best_lgbm_params.update({"objective":"binary","metric":"auc","verbosity":-1,"n_jobs":-1,"seed":cfg.SEED})
651
+
652
+ print(f"\n🏆 Best LightGBM AUC: {study_lgbm.best_value:.5f}")
653
+ print(f" Best params: {json.dumps(best_lgbm_params, indent=2)}")
654
+
655
+
656
+
657
+
658
+ def objective_xgb(trial, X, y, cfg):
659
+ params = {
660
+ "objective": "binary:logistic",
661
+ "tree_method": "hist",
662
+ "device": "cuda", # ✅ enables GPU
663
+ "max_bin": 256, # ⚡ important for GPU speed
664
+ "eval_metric": "auc",
665
+ "tree_method": "hist",
666
+ "use_label_encoder": False,
667
+ "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
668
+ "max_depth": trial.suggest_int("max_depth", 4, 10),
669
+ "min_child_weight": trial.suggest_int("min_child_weight", 1, 20),
670
+ "subsample": trial.suggest_float("subsample", 0.6, 1.0),
671
+ "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
672
+ "gamma": trial.suggest_float("gamma", 0.0, 2.0),
673
+ "reg_alpha": trial.suggest_float("reg_alpha", 1e-4, 10.0, log=True),
674
+ "reg_lambda": trial.suggest_float("reg_lambda", 1e-4, 10.0, log=True),
675
+ "scale_pos_weight": trial.suggest_float("scale_pos_weight", 1.0, 10.0),
676
+ "seed": cfg.SEED,
677
+ "n_jobs": -1,
678
+ }
679
+
680
+ skf = StratifiedKFold(n_splits=3, shuffle=True, random_state=cfg.SEED)
681
+ scores = []
682
+
683
+ for trn_idx, val_idx in skf.split(X, y):
684
+ X_trn, X_val = X.iloc[trn_idx], X.iloc[val_idx]
685
+ y_trn, y_val = y.iloc[trn_idx], y.iloc[val_idx]
686
+
687
+ dtrain = xgb.DMatrix(X_trn, label=np.asarray(y_trn, dtype=np.float32))
688
+ dval = xgb.DMatrix(X_val, label=np.asarray(y_val, dtype=np.float32))
689
+
690
+
691
+ model = xgb.train(
692
+ params, dtrain,
693
+ num_boost_round=1000,
694
+ evals=[(dval, "val")],
695
+ early_stopping_rounds=50,
696
+ verbose_eval=False,
697
+ )
698
+ preds = model.predict(dval)
699
+ scores.append(roc_auc_score(y_val, preds))
700
+
701
+ return np.mean(scores)
702
+
703
+ print("🔍 Running Optuna HPO for XGBoost...")
704
+ study_xgb = optuna.create_study(
705
+ direction="maximize",
706
+ sampler=optuna.samplers.TPESampler(seed=cfg.SEED),
707
+ study_name="xgb-credit-hpo"
708
+ )
709
+ study_xgb.optimize(
710
+ lambda trial: objective_xgb(trial, X, y, cfg),
711
+ n_trials=cfg.OPTUNA_TRIALS,
712
+ show_progress_bar=True,
713
+ )
714
+
715
+ best_xgb_params = study_xgb.best_params
716
+ best_xgb_params.update({"objective":"binary:logistic","eval_metric":"auc","tree_method":"hist","seed":cfg.SEED,"n_jobs":-1})
717
+
718
+ print(f"\n🏆 Best XGBoost AUC: {study_xgb.best_value:.5f}")
719
+
720
+
721
+ def train_full_ensemble(X, y, X_test, lgbm_params, xgb_params, cfg):
722
+
723
+ lgbm_params.update({
724
+ "device": "gpu",
725
+ "max_bin": 255,
726
+ "gpu_use_dp": False,
727
+ "force_col_wise": True
728
+ })
729
+
730
+ xgb_params.update({
731
+ "device": "cuda",
732
+ "tree_method": "hist",
733
+ "max_bin": 256
734
+ })
735
+
736
+ run = wandb.init(
737
+ project=cfg.WANDB_PROJECT,
738
+ name="ensemble-lgbm-xgb",
739
+ config={"lgbm": lgbm_params, "xgb": xgb_params, "n_folds": cfg.N_FOLDS},
740
+ tags=["ensemble", "final"]
741
+ )
742
+
743
+ skf = StratifiedKFold(n_splits=cfg.N_FOLDS, shuffle=True, random_state=cfg.SEED)
744
+
745
+ oof_lgbm = np.zeros(len(X))
746
+ oof_xgb = np.zeros(len(X))
747
+ test_lgbm = np.zeros(len(X_test))
748
+ test_xgb = np.zeros(len(X_test))
749
+
750
+ lgbm_models_list = []
751
+ xgb_models_list = []
752
+
753
+ for fold, (trn_idx, val_idx) in enumerate(skf.split(X, y)):
754
+ print(f"\n🚀 Fold {fold+1}")
755
+
756
+ X_trn, X_val = X.iloc[trn_idx], X.iloc[val_idx]
757
+ y_trn, y_val = y.iloc[trn_idx], y.iloc[val_idx]
758
+
759
+ y_trn_np = np.asarray(y_trn, dtype=np.float32)
760
+ y_val_np = np.asarray(y_val, dtype=np.float32)
761
+
762
+ dl_trn = lgb.Dataset(X_trn, label=y_trn_np)
763
+ dl_val = lgb.Dataset(X_val, label=y_val_np, reference=dl_trn)
764
+
765
+ lgb_model = lgb.train(
766
+ lgbm_params,
767
+ dl_trn,
768
+ num_boost_round=cfg.LGBM_N_ITER,
769
+ valid_sets=[dl_val],
770
+ callbacks=[
771
+ lgb.early_stopping(cfg.EARLY_STOPPING, verbose=False),
772
+ lgb.log_evaluation(100),
773
+ ]
774
+ )
775
+
776
+ lgb_val_pred = lgb_model.predict(X_val, num_iteration=lgb_model.best_iteration)
777
+ oof_lgbm[val_idx] = lgb_val_pred
778
+ test_lgbm += lgb_model.predict(X_test, num_iteration=lgb_model.best_iteration) / cfg.N_FOLDS
779
+ lgbm_models_list.append(lgb_model)
780
+
781
+ dx_trn = xgb.DMatrix(X_trn, label=y_trn_np)
782
+ dx_val = xgb.DMatrix(X_val, label=y_val_np)
783
+ dx_tst = xgb.DMatrix(X_test)
784
+
785
+ xgb_model = xgb.train(
786
+ xgb_params,
787
+ dx_trn,
788
+ num_boost_round=cfg.XGB_N_ITER,
789
+ evals=[(dx_val, "val")],
790
+ early_stopping_rounds=cfg.EARLY_STOPPING,
791
+ verbose_eval=False,
792
+ )
793
+
794
+ xgb_val_pred = xgb_model.predict(dx_val)
795
+ oof_xgb[val_idx] = xgb_val_pred
796
+ test_xgb += xgb_model.predict(dx_tst) / cfg.N_FOLDS
797
+ xgb_models_list.append(xgb_model)
798
+
799
+ # Fold metrics
800
+ auc_l = roc_auc_score(y_val, lgb_val_pred)
801
+ auc_x = roc_auc_score(y_val, xgb_val_pred)
802
+
803
+ print(f" LGBM: {auc_l:.5f} | XGB: {auc_x:.5f}")
804
+
805
+
806
+ print("\n🔍 Optimizing blend weights...")
807
+
808
+ best_auc = 0
809
+ best_w = 0.5
810
+
811
+ for w in np.arange(0.0, 1.01, 0.01):
812
+ blend = w * oof_lgbm + (1 - w) * oof_xgb
813
+ auc = roc_auc_score(y, blend)
814
+
815
+ if auc > best_auc:
816
+ best_auc = auc
817
+ best_w = w
818
+
819
+ print(f"✅ Best weight → LGBM: {best_w:.2f}, XGB: {1-best_w:.2f}")
820
+ print(f"🏆 Best OOF AUC: {best_auc:.5f}")
821
+
822
+ oof_blend = best_w * oof_lgbm + (1 - best_w) * oof_xgb
823
+ test_blend = best_w * test_lgbm + (1 - best_w) * test_xgb
824
+
825
+ wandb.log({
826
+ "final_oof_auc": best_auc,
827
+ "lgbm_weight": best_w,
828
+ "xgb_weight": 1 - best_w
829
+ })
830
+
831
+ run.finish()
832
+
833
+ return lgbm_models_list, xgb_models_list, oof_lgbm, oof_xgb, oof_blend, test_blend
834
+
835
+
836
+ lgbm_models_final, xgb_models_final, oof_lgbm, oof_xgb, oof_blend, test_blend = train_full_ensemble(
837
+ X, y, X_test_final, best_lgbm_params, best_xgb_params, cfg
838
+ )
839
+
840
+
841
+ print("🔍 Computing SHAP values (TreeExplainer)...")
842
+
843
+ # Use the first fold's LightGBM model for SHAP analysis
844
+ explainer = shap.TreeExplainer(lgbm_models_final[0])
845
+
846
+ # Sample 2000 rows for speed
847
+ sample_idx = np.random.choice(len(X), min(2000, len(X)), replace=False)
848
+ X_sample = X.iloc[sample_idx]
849
+ shap_vals = explainer.shap_values(X_sample)
850
+
851
+ # For binary classification, lgbm returns list [neg_class, pos_class]
852
+ if isinstance(shap_vals, list):
853
+ shap_vals = shap_vals[1]
854
+
855
+ print(f"✅ SHAP values shape: {shap_vals.shape}")
856
+
857
+ # ── 1. Beeswarm / Summary Plot ──────────────────────────────────────
858
+ fig, ax = plt.subplots(figsize=(12, 10))
859
+ shap.summary_plot(shap_vals, X_sample, plot_type="dot", max_display=25, show=False)
860
+ plt.title("SHAP Beeswarm — Feature Impact on Credit Risk", fontsize=14, fontweight="bold")
861
+ plt.tight_layout()
862
+ plt.savefig(f"{cfg.OUTPUT_DIR}/shap_beeswarm.png", dpi=150, bbox_inches="tight")
863
+ plt.show()
864
+
865
+ # ── 2. Bar Plot (mean |SHAP|) ────────────────────────────────────────
866
+ fig, ax = plt.subplots(figsize=(12, 8))
867
+ shap.summary_plot(shap_vals, X_sample, plot_type="bar", max_display=20, show=False)
868
+ plt.title("Mean |SHAP| — Global Feature Importance", fontsize=14, fontweight="bold")
869
+ plt.tight_layout()
870
+ plt.savefig(f"{cfg.OUTPUT_DIR}/shap_bar.png", dpi=150, bbox_inches="tight")
871
+ plt.show()
872
+
873
+ # ── 3. Dependence Plots for top 3 features ──────────────────────────
874
+ top3_features = pd.DataFrame({
875
+ "feature": X.columns,
876
+ "mean_shap": np.abs(shap_vals).mean(0)
877
+ }).nlargest(3, "mean_shap")["feature"].tolist()
878
+
879
+ fig, axes = plt.subplots(1, 3, figsize=(18, 5))
880
+ for ax, feat in zip(axes, top3_features):
881
+ feat_idx = list(X.columns).index(feat)
882
+ shap.dependence_plot(feat_idx, shap_vals, X_sample, ax=ax, show=False)
883
+ ax.set_title(f"SHAP Dependence: {feat}", fontsize=10)
884
+ plt.tight_layout()
885
+ plt.savefig(f"{cfg.OUTPUT_DIR}/shap_dependence.png", dpi=150, bbox_inches="tight")
886
+ plt.show()
887
+
888
+ # ── 4. Waterfall for single applicant (most risky) ───────────────────
889
+ most_risky_idx = np.argmax(oof_blend[sample_idx])
890
+ expl_obj = shap.Explanation(
891
+ values = shap_vals[most_risky_idx],
892
+ base_values= explainer.expected_value if not isinstance(explainer.expected_value, list) else explainer.expected_value[1],
893
+ data = X_sample.iloc[most_risky_idx].values,
894
+ feature_names=X_sample.columns.tolist()
895
+ )
896
+ plt.figure(figsize=(14, 8))
897
+ shap.plots.waterfall(expl_obj, max_display=15, show=False)
898
+ plt.title("SHAP Waterfall — Most Risky Applicant", fontsize=13, fontweight="bold")
899
+ plt.tight_layout()
900
+ plt.savefig(f"{cfg.OUTPUT_DIR}/shap_waterfall.png", dpi=150, bbox_inches="tight")
901
+ plt.show()
902
+
903
+ print("✅ All SHAP plots saved")
904
+
905
+
906
+ print("📉 Simulating Concept Drift...")
907
+
908
+ run = wandb.init(
909
+ project=cfg.WANDB_PROJECT,
910
+ name="concept-drift-simulation",
911
+ tags=["drift", "simulation"]
912
+ )
913
+
914
+ # ── Get validation split (index-aligned with X, y) ────────────────────────────
915
+ skf = StratifiedKFold(n_splits=2, shuffle=True, random_state=cfg.SEED)
916
+ trn_idx, val_idx = list(skf.split(X, y))[0]
917
+
918
+ y_val_drift = y.iloc[val_idx].values
919
+
920
+ # Baseline AUC using ensemble OOF preds (already computed over full X)
921
+ base_preds = oof_blend[val_idx]
922
+ base_auc = roc_auc_score(y_val_drift, base_preds)
923
+ print(f" Baseline AUC (no drift): {base_auc:.5f}")
924
+
925
+ # Top-30 for identifying WHICH columns to perturb
926
+ top30 = feat_imp.head(30)["feature"].tolist()
927
+
928
+ # Drift scenarios
929
+ drift_scenarios = {
930
+ "Baseline (No Drift)": {"income_mult": 1.0, "emp_mask": 0.0, "label_noise": 0.0},
931
+ "Mild Income Shock (-30%)": {"income_mult": 0.7, "emp_mask": 0.05, "label_noise": 0.02},
932
+ "Severe Income Shock (-60%)": {"income_mult": 0.4, "emp_mask": 0.15, "label_noise": 0.05},
933
+ "Mass Job Loss (20%)": {"income_mult": 0.5, "emp_mask": 0.20, "label_noise": 0.08},
934
+ "Full Economic Shock": {"income_mult": 0.3, "emp_mask": 0.35, "label_noise": 0.12},
935
+ }
936
+
937
+ drift_results = []
938
+
939
+ for scenario_name, drift_cfg in drift_scenarios.items():
940
+ # ── Start from the FULL feature matrix (234 cols) ────────────────────────
941
+ X_drifted = X.iloc[val_idx].copy().reset_index(drop=True) # shape: (n_val, 234)
942
+
943
+ # Apply income shock to relevant columns (wherever they exist in full matrix)
944
+ income_cols = [c for c in X_drifted.columns if c in
945
+ ["AMT_INCOME_TOTAL", "INCOME_PER_PERSON", "CREDIT_INCOME_RATIO",
946
+ "ANNUITY_INCOME_RATIO", "EXT_CREDIT_RATIO"]]
947
+ for col in income_cols:
948
+ X_drifted[col] *= drift_cfg["income_mult"]
949
+
950
+ # Apply employment shock
951
+ emp_cols = [c for c in X_drifted.columns if "EMPLOY" in c or "DAYS_EMPLOYED" in c]
952
+ mask = np.random.random(len(X_drifted)) < drift_cfg["emp_mask"]
953
+ for col in emp_cols:
954
+ X_drifted.loc[mask, col] = 0
955
+
956
+ # Label noise
957
+ y_noisy = y_val_drift.copy()
958
+ if drift_cfg["label_noise"] > 0:
959
+ noise_n = max(1, int(drift_cfg["label_noise"] * len(y_noisy)))
960
+ noise_idx = np.random.choice(len(y_noisy), noise_n, replace=False)
961
+ y_noisy[noise_idx] = 1 - y_noisy[noise_idx]
962
+
963
+ # ── Predict with the full-feature model ──────────────────────────────────
964
+ drifted_preds = lgbm_models_final[0].predict(X_drifted) # 234 cols ✅
965
+ drifted_auc = roc_auc_score(y_noisy, drifted_preds)
966
+
967
+ drift_results.append({
968
+ "scenario": scenario_name,
969
+ "auc": drifted_auc,
970
+ "auc_drop": base_auc - drifted_auc,
971
+ "income_mult": drift_cfg["income_mult"],
972
+ "emp_mask": drift_cfg["emp_mask"],
973
+ })
974
+ wandb.log({"scenario": scenario_name, "drifted_auc": drifted_auc})
975
+ print(f" {scenario_name:40s} | AUC: {drifted_auc:.5f} | Drop: {base_auc - drifted_auc:+.5f}")
976
+
977
+ # ── Plots ─────────────────────────────────────────────────────────────────────
978
+ drift_df = pd.DataFrame(drift_results)
979
+ fig, axes = plt.subplots(1, 2, figsize=(14, 5))
980
+
981
+ axes[0].bar(range(len(drift_df)), drift_df["auc"],
982
+ color=["#2196F3","#4CAF50","#FF9800","#F44336","#9C27B0"])
983
+ axes[0].axhline(y=0.7, color="red", linestyle="--", label="Min Acceptable AUC")
984
+ axes[0].set_xticks(range(len(drift_df)))
985
+ axes[0].set_xticklabels([s.split("(")[0].strip() for s in drift_df["scenario"]],
986
+ rotation=20, ha="right")
987
+ axes[0].set_ylabel("AUC")
988
+ axes[0].set_title("Model Performance Under Drift Scenarios")
989
+ axes[0].legend()
990
+
991
+ axes[1].plot(drift_df["income_mult"], drift_df["auc"],
992
+ "o-", color="#F44336", linewidth=2, markersize=8)
993
+ axes[1].set_xlabel("Income Multiplier (1.0 = no drift)")
994
+ axes[1].set_ylabel("AUC")
995
+ axes[1].set_title("AUC Degradation vs Income Shock")
996
+ axes[1].fill_between(drift_df["income_mult"], drift_df["auc"], 0.5,
997
+ alpha=0.15, color="#F44336")
998
+
999
+ plt.tight_layout()
1000
+ plt.savefig(f"{cfg.OUTPUT_DIR}/drift_simulation.png", dpi=150, bbox_inches="tight")
1001
+ plt.show()
1002
+
1003
+ run.finish()
1004
+ print("✅ Drift simulation complete")
1005
+
1006
+
1007
+ from river import (
1008
+ drift as river_drift,
1009
+ linear_model as river_lm,
1010
+ preprocessing as river_pp,
1011
+ metrics as river_metrics,
1012
+ ensemble as river_ens,
1013
+ tree as river_tree,
1014
+ optim,
1015
+ )
1016
+ import time
1017
+
1018
+ print("🌊 Initialising River online learning pipeline...")
1019
+
1020
+ # Build River pipeline: StandardScaler → Hoeffding Adaptive Tree
1021
+ river_pipeline = river_pp.StandardScaler() | river_tree.HoeffdingAdaptiveTreeClassifier(
1022
+ grace_period=200,
1023
+ delta=1e-5,
1024
+ seed=cfg.SEED,
1025
+ )
1026
+
1027
+ # ADWIN drift detector — detects distribution shifts in the error stream
1028
+ adwin = river_drift.ADWIN(delta=0.002)
1029
+
1030
+ # Track metrics
1031
+ river_auc = river_metrics.ROCAUC()
1032
+ drift_points = []
1033
+ retrain_count = 0
1034
+ running_errors = []
1035
+
1036
+ # Simulate streaming with drift injection
1037
+ # Use top 30 features for speed
1038
+ X_stream = X[top30].fillna(0).reset_index(drop=True)
1039
+ y_stream = y.reset_index(drop=True)
1040
+
1041
+ # Inject drift at 60% of the way through
1042
+ DRIFT_INJECT_AT = int(len(X_stream) * 0.6)
1043
+ DRIFT_DURATION = 5000
1044
+
1045
+ print(f"🔄 Streaming {len(X_stream):,} samples...")
1046
+ print(f" Drift will be injected at sample {DRIFT_INJECT_AT:,}")
1047
+
1048
+ start = time.time()
1049
+
1050
+ for i, (xi, yi) in enumerate(stream.iter_pandas(X_stream, y_stream)):
1051
+ # Inject concept drift
1052
+ if DRIFT_INJECT_AT <= i < DRIFT_INJECT_AT + DRIFT_DURATION:
1053
+ xi["AMT_INCOME_TOTAL"] = xi.get("AMT_INCOME_TOTAL", 0) * cfg.DRIFT_INCOME_MULTIPLIER
1054
+ yi = 1 - yi if np.random.random() < 0.12 else yi # label noise
1055
+
1056
+ # Predict → update metric → learn
1057
+ y_prob = river_pipeline.predict_proba_one(xi)
1058
+ p1 = y_prob.get(1, 0.5)
1059
+ river_auc.update(yi, p1)
1060
+
1061
+ error = abs(yi - p1)
1062
+ running_errors.append(error)
1063
+ adwin.update(error)
1064
+
1065
+ if adwin.drift_detected:
1066
+ drift_points.append(i)
1067
+ retrain_count += 1
1068
+ # Reset learner on drift (warm-start)
1069
+ river_pipeline = river_pp.StandardScaler() | river_tree.HoeffdingAdaptiveTreeClassifier(
1070
+ grace_period=50, # faster adaptation post-drift
1071
+ delta=1e-5,
1072
+ seed=cfg.SEED,
1073
+ )
1074
+ if retrain_count <= 5:
1075
+ print(f" 🚨 DRIFT DETECTED at sample {i:,} | Retrain #{retrain_count} | Running AUC: {river_auc.get():.4f}")
1076
+
1077
+ river_pipeline.learn_one(xi, yi)
1078
+
1079
+ elapsed = time.time() - start
1080
+ print(f"\n✅ Online learning complete in {elapsed:.1f}s")
1081
+ print(f" Final AUC: {river_auc.get():.5f}")
1082
+ print(f" Total drift detections: {len(drift_points)}")
1083
+ print(f" Total retrains: {retrain_count}")
1084
+
1085
+ # Plot error stream + drift points
1086
+ fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8))
1087
+
1088
+ window = 500
1089
+ smoothed = pd.Series(running_errors).rolling(window).mean()
1090
+ ax1.plot(smoothed, color="#2196F3", linewidth=1, label=f"Error (rolling {window})")
1091
+ for dp in drift_points:
1092
+ ax1.axvline(dp, color="red", linewidth=0.8, alpha=0.7)
1093
+ ax1.axvline(DRIFT_INJECT_AT, color="orange", linewidth=2, linestyle="--", label="Drift Injected")
1094
+ ax1.set_title("ADWIN Drift Detection — Error Stream", fontweight="bold")
1095
+ ax1.set_xlabel("Sample")
1096
+ ax1.set_ylabel("Prediction Error")
1097
+ ax1.legend()
1098
+
1099
+ # Cumulative drift detections
1100
+ ax2.step(drift_points, range(1, len(drift_points)+1), color="#F44336", linewidth=2)
1101
+ ax2.axvline(DRIFT_INJECT_AT, color="orange", linewidth=2, linestyle="--", label="Drift Injected")
1102
+ ax2.set_title("Cumulative Drift Detections", fontweight="bold")
1103
+ ax2.set_xlabel("Sample Index")
1104
+ ax2.set_ylabel("Cumulative Detections")
1105
+ ax2.legend()
1106
+
1107
+ plt.tight_layout()
1108
+ plt.savefig(f"{cfg.OUTPUT_DIR}/river_drift_detection.png", dpi=150, bbox_inches="tight")
1109
+ plt.show()
1110
+
1111
+
1112
+ import pickle, joblib
1113
+
1114
+ # Save LightGBM models
1115
+ for i, model in enumerate(lgbm_models_final):
1116
+ model.save_model(f"{cfg.MODEL_DIR}/lgbm_fold_{i+1}.txt")
1117
+
1118
+ # Save XGBoost models
1119
+ for i, model in enumerate(xgb_models_final):
1120
+ model.save_model(f"{cfg.MODEL_DIR}/xgb_fold_{i+1}.json")
1121
+
1122
+ # Save PCA + config
1123
+ joblib.dump(pca, f"{cfg.MODEL_DIR}/pca.pkl")
1124
+ joblib.dump(scaler_drift, f"{cfg.MODEL_DIR}/scaler.pkl")
1125
+
1126
+ # Save feature list
1127
+ with open(f"{cfg.MODEL_DIR}/feature_cols.json", "w") as f:
1128
+ json.dump(FEATURE_COLS, f)
1129
+
1130
+ print(f"✅ Models saved to {cfg.MODEL_DIR}")
1131
+
1132
+ # Generate submission
1133
+ submission = pd.DataFrame({
1134
+ "SK_ID_CURR": test["SK_ID_CURR"],
1135
+ "TARGET": test_blend
1136
+ })
1137
+ submission.to_csv(f"{cfg.OUTPUT_DIR}/submission_ensemble.csv", index=False)
1138
+ print(f"✅ Submission saved: {submission.shape}")
1139
+ print(submission.head())
1140
+
1141
+ # W&B — final summary run
1142
+ run = wandb.init(project=cfg.WANDB_PROJECT, name="final-summary", tags=["summary"])
1143
+ wandb.log({
1144
+ "lgbm_baseline_auc": lgbm_auc,
1145
+ "lgbm_optuna_best": study_lgbm.best_value,
1146
+ "xgb_optuna_best": study_xgb.best_value,
1147
+ "ensemble_oof_auc": roc_auc_score(y, oof_blend),
1148
+ "drift_detections": len(drift_points),
1149
+ "total_features": len(FEATURE_COLS),
1150
+ "nlp_features": cfg.NLP_DIM_REDUCTION,
1151
+ })
1152
+ wandb.save(f"{cfg.OUTPUT_DIR}/submission_ensemble.csv")
1153
+ run.finish()
1154
+ print("✅ All done. W&B summary logged.")
1155
+
1156
+
1157
+
1158
+
1159
+
1160
+
1161
+
1162
+
1163
+
1164
+
1165
+
1166
+
1167
+
1168
+
1169
+
1170
+
1171
+
1172
+
1173
+
1174
+
1175
+
1176
+
1177
+
1178
+
1179
+
1180
+
1181
+
1182
+
1183
+
1184
+
1185
+
1186
+
1187
+
1188
+
1189
+
1190
+
1191
+
1192
+
1193
+
1194
+
1195
+
1196
+
kaggle_output/drift_simulation.png ADDED

Git LFS Details

  • SHA256: 00b80167ce74879cb82b32602a1307389fdf4599cec5796bd055bbf5c28fcad6
  • Pointer size: 131 Bytes
  • Size of remote file: 103 kB
kaggle_output/eda_overview.png ADDED

Git LFS Details

  • SHA256: 42c116ef1aa88413173e9ee74dc1cb46580ceee3049e58b1ff401c8c5db926fc
  • Pointer size: 131 Bytes
  • Size of remote file: 289 kB
kaggle_output/explainable-credit-risk-modeling-with-alternative.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [{"stream_name":"stderr","time":5.000587062,"data":"/usr/local/lib/python3.12/dist-packages/mistune.py:435: SyntaxWarning: invalid escape sequence '\\|'\n"}
2
+ ,{"stream_name":"stderr","time":5.000774502,"data":" cells[i][c] = re.sub('\\\\\\\\\\|', '|', cell)\n"}
3
+ ,{"stream_name":"stderr","time":5.657678067,"data":"/usr/local/lib/python3.12/dist-packages/nbconvert/filters/filter_links.py:36: SyntaxWarning: invalid escape sequence '\\_'\n"}
4
+ ,{"stream_name":"stderr","time":5.657724537,"data":" text = re.sub(r'_', '\\_', text) # Escape underscores in display text\n"}
5
+ ,{"stream_name":"stderr","time":7.211358548,"data":"[NbConvertApp] Converting notebook __notebook__.ipynb to html\n"}
6
+ ,{"stream_name":"stderr","time":9.655510019,"data":"[NbConvertApp] Support files will be in __results___files/\n"}
7
+ ,{"stream_name":"stderr","time":9.656032529,"data":"[NbConvertApp] Making directory __results___files\n"}
8
+ ,{"stream_name":"stderr","time":9.657319549,"data":"[NbConvertApp] Making directory __results___files\n"}
9
+ ,{"stream_name":"stderr","time":9.658246489,"data":"[NbConvertApp] Making directory __results___files\n"}
10
+ ,{"stream_name":"stderr","time":9.660020589,"data":"[NbConvertApp] Making directory __results___files\n"}
11
+ ,{"stream_name":"stderr","time":9.661195168999999,"data":"[NbConvertApp] Making directory __results___files\n"}
12
+ ,{"stream_name":"stderr","time":9.663283069,"data":"[NbConvertApp] Making directory __results___files\n"}
13
+ ,{"stream_name":"stderr","time":9.664512739,"data":"[NbConvertApp] Making directory __results___files\n"}
14
+ ,{"stream_name":"stderr","time":9.665452399,"data":"[NbConvertApp] Writing 558261 bytes to __results__.html\n"}
15
+ ]
kaggle_output/models/feature_cols.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["NAME_CONTRACT_TYPE", "CODE_GENDER", "FLAG_OWN_CAR", "FLAG_OWN_REALTY", "CNT_CHILDREN", "AMT_INCOME_TOTAL", "AMT_CREDIT", "AMT_ANNUITY", "AMT_GOODS_PRICE", "NAME_TYPE_SUITE", "NAME_INCOME_TYPE", "NAME_EDUCATION_TYPE", "NAME_FAMILY_STATUS", "NAME_HOUSING_TYPE", "REGION_POPULATION_RELATIVE", "DAYS_BIRTH", "DAYS_EMPLOYED", "DAYS_REGISTRATION", "DAYS_ID_PUBLISH", "OWN_CAR_AGE", "FLAG_MOBIL", "FLAG_EMP_PHONE", "FLAG_WORK_PHONE", "FLAG_CONT_MOBILE", "FLAG_PHONE", "FLAG_EMAIL", "OCCUPATION_TYPE", "CNT_FAM_MEMBERS", "REGION_RATING_CLIENT", "REGION_RATING_CLIENT_W_CITY", "WEEKDAY_APPR_PROCESS_START", "HOUR_APPR_PROCESS_START", "REG_REGION_NOT_LIVE_REGION", "REG_REGION_NOT_WORK_REGION", "LIVE_REGION_NOT_WORK_REGION", "REG_CITY_NOT_LIVE_CITY", "REG_CITY_NOT_WORK_CITY", "LIVE_CITY_NOT_WORK_CITY", "ORGANIZATION_TYPE", "EXT_SOURCE_1", "EXT_SOURCE_2", "EXT_SOURCE_3", "APARTMENTS_AVG", "BASEMENTAREA_AVG", "YEARS_BEGINEXPLUATATION_AVG", "YEARS_BUILD_AVG", "COMMONAREA_AVG", "ELEVATORS_AVG", "ENTRANCES_AVG", "FLOORSMAX_AVG", "FLOORSMIN_AVG", "LANDAREA_AVG", "LIVINGAPARTMENTS_AVG", "LIVINGAREA_AVG", "NONLIVINGAPARTMENTS_AVG", "NONLIVINGAREA_AVG", "APARTMENTS_MODE", "BASEMENTAREA_MODE", "YEARS_BEGINEXPLUATATION_MODE", "YEARS_BUILD_MODE", "COMMONAREA_MODE", "ELEVATORS_MODE", "ENTRANCES_MODE", "FLOORSMAX_MODE", "FLOORSMIN_MODE", "LANDAREA_MODE", "LIVINGAPARTMENTS_MODE", "LIVINGAREA_MODE", "NONLIVINGAPARTMENTS_MODE", "NONLIVINGAREA_MODE", "APARTMENTS_MEDI", "BASEMENTAREA_MEDI", "YEARS_BEGINEXPLUATATION_MEDI", "YEARS_BUILD_MEDI", "COMMONAREA_MEDI", "ELEVATORS_MEDI", "ENTRANCES_MEDI", "FLOORSMAX_MEDI", "FLOORSMIN_MEDI", "LANDAREA_MEDI", "LIVINGAPARTMENTS_MEDI", "LIVINGAREA_MEDI", "NONLIVINGAPARTMENTS_MEDI", "NONLIVINGAREA_MEDI", "FONDKAPREMONT_MODE", "HOUSETYPE_MODE", "TOTALAREA_MODE", "WALLSMATERIAL_MODE", "EMERGENCYSTATE_MODE", "OBS_30_CNT_SOCIAL_CIRCLE", "DEF_30_CNT_SOCIAL_CIRCLE", "OBS_60_CNT_SOCIAL_CIRCLE", "DEF_60_CNT_SOCIAL_CIRCLE", "DAYS_LAST_PHONE_CHANGE", "FLAG_DOCUMENT_2", "FLAG_DOCUMENT_3", "FLAG_DOCUMENT_4", "FLAG_DOCUMENT_5", "FLAG_DOCUMENT_6", "FLAG_DOCUMENT_7", "FLAG_DOCUMENT_8", "FLAG_DOCUMENT_9", "FLAG_DOCUMENT_10", "FLAG_DOCUMENT_11", "FLAG_DOCUMENT_12", "FLAG_DOCUMENT_13", "FLAG_DOCUMENT_14", "FLAG_DOCUMENT_15", "FLAG_DOCUMENT_16", "FLAG_DOCUMENT_17", "FLAG_DOCUMENT_18", "FLAG_DOCUMENT_19", "FLAG_DOCUMENT_20", "FLAG_DOCUMENT_21", "AMT_REQ_CREDIT_BUREAU_HOUR", "AMT_REQ_CREDIT_BUREAU_DAY", "AMT_REQ_CREDIT_BUREAU_WEEK", "AMT_REQ_CREDIT_BUREAU_MON", "AMT_REQ_CREDIT_BUREAU_QRT", "AMT_REQ_CREDIT_BUREAU_YEAR", "CREDIT_INCOME_RATIO", "ANNUITY_INCOME_RATIO", "CREDIT_TERM", "GOODS_CREDIT_RATIO", "AGE_YEARS", "EMPLOYMENT_YEARS", "EMPLOYED_RATIO", "CREDIT_TO_AGE", "INCOME_PER_PERSON", "CHILDREN_RATIO", "EXT_SOURCE_MEAN", "EXT_SOURCE_MIN", "EXT_SOURCE_PROD", "EXT_SOURCE_STD", "EXT1_EXT2_INTERACTION", "EXT2_EXT3_INTERACTION", "EXT_CREDIT_RATIO", "DOCUMENT_COUNT", "TOTAL_ENQUIRIES", "RECENT_ENQUIRY_RATIO", "HAS_CAR_REALTY", "DAYS_REGISTRATION_RATIO", "BUREAU_DAYS_CREDIT_MEAN", "BUREAU_DAYS_CREDIT_MIN", "BUREAU_DAYS_CREDIT_MAX", "BUREAU_DAYS_CREDIT_STD", "BUREAU_CREDIT_DAY_OVERDUE_MEAN", "BUREAU_CREDIT_DAY_OVERDUE_MAX", "BUREAU_CREDIT_DAY_OVERDUE_SUM", "BUREAU_DAYS_CREDIT_ENDDATE_MEAN", "BUREAU_DAYS_CREDIT_ENDDATE_MIN", "BUREAU_DAYS_CREDIT_ENDDATE_MAX", "BUREAU_AMT_CREDIT_SUM_MEAN", "BUREAU_AMT_CREDIT_SUM_MAX", "BUREAU_AMT_CREDIT_SUM_SUM", "BUREAU_AMT_CREDIT_SUM_DEBT_MEAN", "BUREAU_AMT_CREDIT_SUM_DEBT_MAX", "BUREAU_AMT_CREDIT_SUM_DEBT_SUM", "BUREAU_AMT_CREDIT_SUM_OVERDUE_MEAN", "BUREAU_AMT_CREDIT_SUM_OVERDUE_MAX", "BUREAU_AMT_CREDIT_SUM_OVERDUE_SUM", "BUREAU_DEBT_CREDIT_RATIO_MEAN", "BUREAU_DEBT_CREDIT_RATIO_MAX", "BUREAU_CREDIT_UTIL_RATE_MEAN", "BUREAU_CREDIT_UTIL_RATE_MAX", "BUREAU_CREDIT_ACTIVE_BINARY_MEAN", "BUREAU_CREDIT_ACTIVE_BINARY_SUM", "BUREAU_STATUS_WORST_MEAN", "BUREAU_STATUS_WORST_MAX", "BUREAU_STATUS_MEAN_MEAN", "BUREAU_MONTHS_COUNT_MEAN", "BUREAU_MONTHS_COUNT_SUM", "BUREAU_CNT_CREDIT_PROLONG_SUM", "BUREAU_CNT_CREDIT_PROLONG_MEAN", "BUREAU_COUNT", "BUREAU_ACTIVE_COUNT", "PREV_COUNT", "PREV_APPROVED_RATE", "PREV_REFUSED_RATE", "PREV_APP_CREDIT_RATIO_MEAN", "PREV_ANNUITY_MEAN", "PREV_CREDIT_MEAN", "PREV_DAYS_DECISION_MEAN", "PREV_DAYS_DECISION_MIN", "PREV_GOODS_PRICE_MEAN", "INST_PAYMENT_DIFF_MEAN", "INST_PAYMENT_DIFF_MAX", "INST_DAYS_ENTRY_DIFF_MEAN", "INST_LATE_PAYMENT_RATE", "INST_SHORT_PAYMENT_RATE", "INST_COUNT", "POS_MONTHS_COUNT", "POS_SK_DPD_MEAN", "POS_SK_DPD_MAX", "POS_DPD_RATE", "POS_CNT_INSTALMENT_MEAN", "CC_UTIL_RATE_MEAN", "CC_UTIL_RATE_MAX", "CC_DRAWING_RATE_MEAN", "CC_AMT_BALANCE_MEAN", "CC_COUNT", "CC_DPD_MEAN", "NLP_EMB_0", "NLP_EMB_1", "NLP_EMB_2", "NLP_EMB_3", "NLP_EMB_4", "NLP_EMB_5", "NLP_EMB_6", "NLP_EMB_7", "NLP_EMB_8", "NLP_EMB_9", "NLP_EMB_10", "NLP_EMB_11", "NLP_EMB_12", "NLP_EMB_13", "NLP_EMB_14", "NLP_EMB_15", "NLP_EMB_16", "NLP_EMB_17", "NLP_EMB_18", "NLP_EMB_19", "NLP_EMB_20", "NLP_EMB_21", "NLP_EMB_22", "NLP_EMB_23", "NLP_EMB_24", "NLP_EMB_25", "NLP_EMB_26", "NLP_EMB_27", "NLP_EMB_28", "NLP_EMB_29", "NLP_EMB_30", "NLP_EMB_31"]
kaggle_output/models/lgbm_fold_1.txt ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/models/lgbm_fold_2.txt ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/models/lgbm_fold_3.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0aa2a44a7ad2ae318c455318f0a14fd7cd88a74c3901eb15d259a7eebfd1acf
3
+ size 10491639
kaggle_output/models/lgbm_fold_4.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fbfdb76aa33054e98456dd5578d28bbc349e85bf6e8ffc2d121d69044667ad2
3
+ size 11224305
kaggle_output/models/lgbm_fold_5.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a8fcd6417da7b9ca0c124315a36dabb3e571151e27c40dd84b354ef58c81a65
3
+ size 11612984
kaggle_output/models/pca.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211b22e835c628cec4a9ba9603bda0ebd974f58ba0fb3631ff1d606570d0d75b
3
+ size 52301
kaggle_output/models/scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b776bd1834cd9402f94f6ca4285fd7dbcf81f0fdbe31b4195cdf0c2a106593a
3
+ size 2199
kaggle_output/models/xgb_fold_1.json ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/models/xgb_fold_2.json ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/models/xgb_fold_3.json ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/models/xgb_fold_4.json ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/models/xgb_fold_5.json ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/river_drift_detection.png ADDED

Git LFS Details

  • SHA256: 5543829fb0e4f06100af6f03daf57c33e4f21fc25570809d0379beb1de8ebab1
  • Pointer size: 131 Bytes
  • Size of remote file: 158 kB
kaggle_output/shap_bar.png ADDED

Git LFS Details

  • SHA256: ecefc3d7c683cf35fe254d9e71a05b516e87e1533cf3856f7e8a7dbdec2153c6
  • Pointer size: 131 Bytes
  • Size of remote file: 136 kB
kaggle_output/shap_beeswarm.png ADDED

Git LFS Details

  • SHA256: 03dcb17ab7e4162ce65947d2268fe4b538cc59853b0ac82dea0a6098029c38e4
  • Pointer size: 131 Bytes
  • Size of remote file: 284 kB
kaggle_output/shap_dependence.png ADDED

Git LFS Details

  • SHA256: 612dc59a4801fff4d301af6027f6eeb9c2b7d1216902a3e551ec56e43a63033b
  • Pointer size: 131 Bytes
  • Size of remote file: 237 kB
kaggle_output/shap_waterfall.png ADDED

Git LFS Details

  • SHA256: 08c27dd9a91368d24aa49365902e648f00e77d602b9408ca29abaed6c153a5f6
  • Pointer size: 131 Bytes
  • Size of remote file: 155 kB
kaggle_output/submission_ensemble.csv ADDED
The diff for this file is too large to render. See raw diff
 
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/config.yaml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.25.0
4
+ e:
5
+ jyd6p8uwdd5m81jhhfteje7lwq5s32ua:
6
+ codePath: kaggle.ipynb
7
+ cpu_count: 2
8
+ cpu_count_logical: 4
9
+ cudaVersion: "13.0"
10
+ disk:
11
+ /:
12
+ total: "8656922775552"
13
+ used: "7347648929792"
14
+ email: suvraadeep@gmail.com
15
+ executable: /usr/bin/python3
16
+ gpu: Tesla T4
17
+ gpu_count: 2
18
+ gpu_nvidia:
19
+ - architecture: Turing
20
+ cudaCores: 2560
21
+ memoryTotal: "16106127360"
22
+ name: Tesla T4
23
+ uuid: GPU-c7ebff42-1f98-33a6-9169-a1e3925c4f52
24
+ - architecture: Turing
25
+ cudaCores: 2560
26
+ memoryTotal: "16106127360"
27
+ name: Tesla T4
28
+ uuid: GPU-4f057d35-8a41-ce55-5d7d-60c77ebb7156
29
+ host: 7a129c53b2b5
30
+ memory:
31
+ total: "33662472192"
32
+ os: Linux-6.6.113+-x86_64-with-glibc2.35
33
+ program: kaggle.ipynb
34
+ python: CPython 3.12.12
35
+ root: /kaggle/working
36
+ startedAt: "2026-03-31T06:55:27.783331Z"
37
+ writerId: jyd6p8uwdd5m81jhhfteje7lwq5s32ua
38
+ m: []
39
+ python_version: 3.12.12
40
+ t:
41
+ "1":
42
+ - 1
43
+ - 5
44
+ - 6
45
+ - 8
46
+ - 11
47
+ - 35
48
+ - 49
49
+ - 53
50
+ - 54
51
+ - 71
52
+ - 75
53
+ - 105
54
+ "2":
55
+ - 1
56
+ - 5
57
+ - 6
58
+ - 8
59
+ - 11
60
+ - 35
61
+ - 49
62
+ - 53
63
+ - 54
64
+ - 71
65
+ - 75
66
+ - 105
67
+ "3":
68
+ - 2
69
+ - 13
70
+ - 15
71
+ - 16
72
+ "4": 3.12.12
73
+ "5": 0.25.0
74
+ "6": 4.41.2
75
+ "8":
76
+ - 1
77
+ - 2
78
+ - 12
79
+ "12": 0.25.0
80
+ "13": linux-x86_64
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/media/table/feature_importance_6_9280c5e00d174ed85360.table.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"columns": ["feature", "importance"], "data": [["EXT_SOURCE_MEAN", 88957.16981611252], ["CREDIT_TERM", 11508.994362545014], ["GOODS_CREDIT_RATIO", 10540.86078901291], ["BUREAU_DEBT_CREDIT_RATIO_MAX", 10539.967162036895], ["EXT2_EXT3_INTERACTION", 8242.227200603485], ["POS_CNT_INSTALMENT_MEAN", 7895.31110868454], ["NLP_EMB_1", 6870.842185974121], ["EXT_SOURCE_MIN", 6497.98137922287], ["INST_LATE_PAYMENT_RATE", 6119.231726264954], ["POS_MONTHS_COUNT", 5955.207735443115], ["PREV_ANNUITY_MEAN", 5747.939696884156], ["AMT_ANNUITY", 5663.237604904175], ["EXT_SOURCE_3", 5611.383906459809], ["PREV_DAYS_DECISION_MIN", 5374.772545909882], ["EXT_SOURCE_STD", 5298.635174560547], ["BUREAU_DEBT_CREDIT_RATIO_MEAN", 5251.993024539947], ["PREV_REFUSED_RATE", 5157.323820114136], ["BUREAU_DAYS_CREDIT_MAX", 4958.93277130127], ["PREV_APP_CREDIT_RATIO_MEAN", 4918.679397964477], ["DAYS_ID_PUBLISH", 4889.688020515442]]}
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/output.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [200] valid_0's auc: 0.775678
2
+ Fold 1 | AUC: 0.77593 | Best iter: 169
3
+ [200] valid_0's auc: 0.784365
4
+ Fold 2 | AUC: 0.78506 | Best iter: 266
5
+ [200] valid_0's auc: 0.778904
6
+ Fold 3 | AUC: 0.77896 | Best iter: 206
7
+ [200] valid_0's auc: 0.784897
8
+ Fold 4 | AUC: 0.78566 | Best iter: 297
9
+ [200] valid_0's auc: 0.773547
10
+ Fold 5 | AUC: 0.77408 | Best iter: 227
11
+
12
+ 🏆 OOF AUC: 0.77992 ± 0.00470
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/requirements.txt ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==75.2.0
2
+ types-setuptools==80.10.0.20260124
3
+ requirements-parser==0.9.0
4
+ pip==24.1.2
5
+ logistro==2.0.1
6
+ tokenizers==0.19.1
7
+ huggingface_hub==0.36.2
8
+ scikit-learn==1.4.2
9
+ xgboost==2.0.3
10
+ sentence-transformers==2.7.0
11
+ choreographer==1.2.1
12
+ lightgbm==4.3.0
13
+ pytest-timeout==2.4.0
14
+ numpy==1.26.4
15
+ category-encoders==2.6.3
16
+ scipy==1.11.4
17
+ river==0.23.0
18
+ optuna-integration==4.8.0
19
+ kaleido==1.2.0
20
+ transformers==4.41.2
21
+ plotly==6.6.0
22
+ pytools==2025.2.5
23
+ pycuda==2026.1
24
+ siphash24==1.8
25
+ protobuf==5.29.5
26
+ torchtune==0.6.1
27
+ learntools==0.3.5
28
+ rouge_score==0.1.2
29
+ pyclipper==1.4.0
30
+ urwid_readline==0.15.1
31
+ h2o==3.46.0.10
32
+ rfc3161-client==1.0.5
33
+ blake3==1.0.8
34
+ mpld3==0.5.12
35
+ qgrid==1.3.1
36
+ ConfigSpace==1.2.2
37
+ woodwork==0.31.0
38
+ ujson==5.12.0
39
+ y-py==0.6.2
40
+ ipywidgets==8.1.5
41
+ scikit-multilearn==0.2.0
42
+ lightning-utilities==0.15.3
43
+ pytesseract==0.3.13
44
+ Cartopy==0.25.0
45
+ odfpy==1.4.1
46
+ Boruta==0.4.3
47
+ docstring-to-markdown==0.17
48
+ torchinfo==1.8.0
49
+ clint==0.5.1
50
+ comm==0.2.3
51
+ Deprecated==1.3.1
52
+ pymongo==4.16.0
53
+ tensorflow-io-gcs-filesystem==0.37.1
54
+ jmespath==1.1.0
55
+ pygltflib==1.16.5
56
+ keras-core==0.1.7
57
+ pandas==2.3.3
58
+ securesystemslib==1.3.1
59
+ ghapi==1.0.11
60
+ qtconsole==5.7.1
61
+ pyemd==2.0.0
62
+ pandas-profiling==3.6.6
63
+ nilearn==0.13.1
64
+ in-toto-attestation==0.9.3
65
+ a2a-sdk==0.3.25
66
+ keras-tuner==1.4.8
67
+ fastuuid==0.14.0
68
+ scikit-surprise==1.1.4
69
+ vtk==9.3.1
70
+ jupyter-ydoc==0.2.5
71
+ aiofiles==22.1.0
72
+ pytokens==0.4.1
73
+ featuretools==1.31.0
74
+ plotly-express==0.4.1
75
+ marshmallow==3.26.2
76
+ easyocr==1.7.2
77
+ ppft==1.7.8
78
+ openslide-bin==4.0.0.13
79
+ fuzzywuzzy==0.18.0
80
+ id==1.6.1
81
+ openslide-python==1.4.3
82
+ kaggle-environments==1.27.3
83
+ pyarrow==23.0.1
84
+ pandasql==0.7.3
85
+ update-checker==0.18.0
86
+ pathos==0.3.2
87
+ jupyter_server_fileid==0.9.3
88
+ fasttext==0.9.3
89
+ coverage==7.13.5
90
+ s3fs==2026.2.0
91
+ stopit==1.1.2
92
+ haversine==2.9.0
93
+ jupyter_server==2.12.5
94
+ geojson==3.2.0
95
+ botocore==1.42.70
96
+ fury==0.12.0
97
+ ipympl==0.10.0
98
+ ipython_pygments_lexers==1.1.1
99
+ olefile==0.47
100
+ jupyter_server_proxy==4.4.0
101
+ datasets==4.8.3
102
+ pytorch-ignite==0.5.3
103
+ xvfbwrapper==0.2.22
104
+ daal==2025.11.0
105
+ open_spiel==1.6.12
106
+ jupyter-lsp==1.5.1
107
+ trx-python==0.4.0
108
+ gpxpy==1.6.2
109
+ papermill==2.7.0
110
+ simpervisor==1.0.0
111
+ kagglehub==1.0.0
112
+ mlcrate==0.2.0
113
+ kaggle==2.0.0
114
+ dask-jobqueue==0.9.0
115
+ model-signing==1.1.1
116
+ jupyterlab==3.6.8
117
+ args==0.1.0
118
+ ImageHash==4.3.2
119
+ typing-inspect==0.9.0
120
+ PyUpSet==0.1.1.post7
121
+ dacite==1.9.2
122
+ pycryptodome==3.23.0
123
+ google-cloud-videointelligence==2.18.0
124
+ visions==0.8.1
125
+ deap==1.4.3
126
+ lml==0.2.0
127
+ jiter==0.10.0
128
+ ypy-websocket==0.8.4
129
+ cytoolz==1.1.0
130
+ path.py==12.5.0
131
+ tensorflow-io==0.37.1
132
+ wavio==0.0.9
133
+ pdf2image==1.17.0
134
+ line_profiler==5.0.2
135
+ fsspec==2026.2.0
136
+ aiobotocore==3.3.0
137
+ optuna==4.8.0
138
+ fastgit==0.0.4
139
+ litellm==1.82.4
140
+ pyLDAvis==3.4.1
141
+ Janome==0.5.0
142
+ langid==1.1.6
143
+ sigstore-models==0.0.6
144
+ pokerkit==0.6.3
145
+ pyaml==26.2.1
146
+ scikit-plot==0.3.7
147
+ nbdev==3.0.12
148
+ simpleitk==2.5.3
149
+ ml_collections==1.1.0
150
+ filetype==1.2.0
151
+ Wand==0.7.0
152
+ jupyter_server_ydoc==0.8.0
153
+ pyjson5==2.0.0
154
+ email-validator==2.3.0
155
+ execnb==0.1.18
156
+ colorama==0.4.6
157
+ ruamel.yaml==0.19.1
158
+ python-lsp-server==1.14.0
159
+ black==26.3.1
160
+ PyArabic==0.6.15
161
+ gymnasium==1.2.0
162
+ path==17.1.1
163
+ gensim==4.4.0
164
+ pypdf==6.9.1
165
+ TPOT==1.1.0
166
+ Pympler==1.1
167
+ bayesian-optimization==3.2.1
168
+ nbconvert==6.4.5
169
+ kornia==0.8.2
170
+ pathspec==1.0.4
171
+ pybind11==3.0.2
172
+ sigstore==4.2.0
173
+ funcy==2.0
174
+ func_timeout==4.3.5
175
+ testpath==0.6.0
176
+ aioitertools==0.13.0
177
+ google-cloud-vision==3.12.1
178
+ ray==2.54.0
179
+ kornia_rs==0.1.10
180
+ traitlets==5.14.3
181
+ gymnax==0.0.8
182
+ dnspython==2.8.0
183
+ chex==0.1.90
184
+ gym==0.26.2
185
+ nbclient==0.5.13
186
+ ydata-profiling==4.18.1
187
+ POT==0.9.6.post1
188
+ deepdiff==8.6.2
189
+ squarify==0.4.4
190
+ dataclasses-json==0.6.7
191
+ pettingzoo==1.24.0
192
+ pytorch-lightning==2.6.1
193
+ segment_anything==1.0
194
+ emoji==2.15.0
195
+ python-bidi==0.6.7
196
+ rgf-python==3.12.0
197
+ ninja==1.13.0
198
+ widgetsnbextension==4.0.15
199
+ minify_html==0.18.1
200
+ urwid==3.0.5
201
+ jedi==0.19.2
202
+ jupyterlab-lsp==3.10.2
203
+ python-lsp-jsonrpc==1.1.2
204
+ QtPy==2.4.3
205
+ pydicom==3.0.1
206
+ multimethod==1.12
207
+ torchmetrics==1.9.0
208
+ asttokens==3.0.1
209
+ docker==7.1.0
210
+ dask-expr==2.0.0
211
+ s3transfer==0.16.0
212
+ build==1.4.0
213
+ Shimmy==2.0.0
214
+ igraph==1.0.0
215
+ puremagic==2.1.0
216
+ jupyterlab_server==2.28.0
217
+ isoweek==1.3.3
218
+ texttable==1.7.0
219
+ kt-legacy==1.0.5
220
+ orderly-set==5.5.0
221
+ pyexcel-io==0.6.7
222
+ catboost==1.2.10
223
+ kagglesdk==0.1.16
224
+ mamba==0.11.3
225
+ dipy==1.12.0
226
+ colorlog==6.10.1
227
+ asn1crypto==1.5.1
228
+ pyexcel-ods==0.6.0
229
+ lime==0.2.0.1
230
+ pox==0.3.7
231
+ rfc8785==0.1.4
232
+ sigstore-rekor-types==0.0.18
233
+ cesium==0.12.4
234
+ boto3==1.42.70
235
+ tuf==6.0.0
236
+ hep_ml==0.8.0
237
+ pyproject_hooks==1.2.0
238
+ phik==0.12.5
239
+ pudb==2025.1.5
240
+ mne==1.11.0
241
+ keras-cv==0.9.0
242
+ dill==0.4.1
243
+ gatspy==0.3
244
+ scikit-learn-intelex==2025.11.0
245
+ onnx==1.20.1
246
+ scikit-optimize==0.10.2
247
+ mypy_extensions==1.1.0
248
+ mistune==0.8.4
249
+ json5==0.13.0
250
+ google-colab==1.0.0
251
+ psutil==5.9.5
252
+ jsonschema==4.26.0
253
+ astunparse==1.6.3
254
+ pycocotools==2.0.11
255
+ lxml==6.0.2
256
+ ipython==7.34.0
257
+ oauthlib==3.3.1
258
+ grpc-google-iam-v1==0.14.3
259
+ array_record==0.8.3
260
+ PuLP==3.3.0
261
+ nvidia-cuda-runtime-cu12==12.8.90
262
+ dask-cuda==26.2.0
263
+ immutabledict==4.3.1
264
+ peewee==4.0.0
265
+ fiona==1.10.1
266
+ aiosignal==1.4.0
267
+ libclang==18.1.1
268
+ annotated-types==0.7.0
269
+ spreg==1.8.5
270
+ grain==0.2.15
271
+ geemap==0.35.3
272
+ patsy==1.0.2
273
+ imagesize==1.4.1
274
+ py-cpuinfo==9.0.0
275
+ pyzmq==26.2.1
276
+ nvidia-cufile-cu12==1.13.1.3
277
+ multidict==6.7.1
278
+ srsly==2.5.2
279
+ intel-openmp==2025.3.2
280
+ uuid_utils==0.14.1
281
+ google-cloud-language==2.19.0
282
+ soxr==1.0.0
283
+ jupyterlab_pygments==0.3.0
284
+ backcall==0.2.0
285
+ tensorflow-hub==0.16.1
286
+ google==3.0.0
287
+ requests-oauthlib==2.0.0
288
+ dopamine_rl==4.1.2
289
+ overrides==7.7.0
290
+ db-dtypes==1.5.0
291
+ jeepney==0.9.0
292
+ langgraph-sdk==0.3.9
293
+ ipython-genutils==0.2.0
294
+ nvidia-cuda-cupti-cu12==12.8.90
295
+ libcugraph-cu12==26.2.0
296
+ catalogue==2.0.10
297
+ beautifulsoup4==4.13.5
298
+ nvidia-ml-py==13.590.48
299
+ sphinxcontrib-devhelp==2.0.0
300
+ partd==1.4.2
301
+ sklearn-pandas==2.2.0
302
+ sphinxcontrib-qthelp==2.0.0
303
+ google-cloud-spanner==3.63.0
304
+ h5py==3.15.1
305
+ python-box==7.4.1
306
+ distributed-ucxx-cu12==0.48.0
307
+ xlrd==2.0.2
308
+ branca==0.8.2
309
+ chardet==5.2.0
310
+ pycairo==1.29.0
311
+ Authlib==1.6.8
312
+ cuda-core==0.3.2
313
+ sentencepiece==0.2.1
314
+ nvidia-cusparselt-cu12==0.7.1
315
+ matplotlib-venn==1.1.2
316
+ scooby==0.11.0
317
+ fqdn==1.5.1
318
+ gin-config==0.5.0
319
+ ipython-sql==0.5.0
320
+ toml==0.10.2
321
+ PyOpenGL==3.1.10
322
+ weasel==0.4.3
323
+ jsonpointer==3.0.0
324
+ google-auth-httplib2==0.3.0
325
+ spint==1.0.7
326
+ nvtx==0.2.14
327
+ websocket-client==1.9.0
328
+ torchao==0.10.0
329
+ splot==1.1.7
330
+ langgraph-checkpoint==4.0.0
331
+ alabaster==1.0.0
332
+ jaxlib==0.7.2
333
+ google-resumable-media==2.8.0
334
+ namex==0.1.0
335
+ quantecon==0.11.0
336
+ nvidia-cuda-cccl-cu12==12.9.27
337
+ google-cloud-aiplatform==1.138.0
338
+ treelite==4.6.1
339
+ google-cloud-resource-manager==1.16.0
340
+ jupyter_core==5.9.1
341
+ spacy-legacy==3.0.12
342
+ librosa==0.11.0
343
+ ibis-framework==9.5.0
344
+ requests-toolbelt==1.0.0
345
+ smart_open==7.5.1
346
+ tensorflow-metadata==1.17.3
347
+ pysal==25.7
348
+ highspy==1.13.1
349
+ click==8.3.1
350
+ markdown-it-py==4.0.0
351
+ nvidia-cusolver-cu12==11.7.3.90
352
+ cupy-cuda12x==14.0.1
353
+ imutils==0.5.4
354
+ grpclib==0.4.9
355
+ opt_einsum==3.4.0
356
+ folium==0.20.0
357
+ moviepy==1.0.3
358
+ opencv-python==4.13.0.92
359
+ en_core_web_sm==3.8.0
360
+ tensorflow-text==2.19.0
361
+ langchain-core==1.2.15
362
+ yarl==1.22.0
363
+ spacy==3.8.11
364
+ importlib_resources==6.5.2
365
+ peft==0.18.1
366
+ lazy_loader==0.4
367
+ polars-runtime-32==1.35.2
368
+ pylibcudf-cu12==26.2.1
369
+ bigquery-magics==0.10.3
370
+ spanner-graph-notebook==1.1.8
371
+ sqlglot==25.20.2
372
+ linkify-it-py==2.0.3
373
+ types-pytz==2025.2.0.20251108
374
+ tifffile==2026.2.20
375
+ tsfresh==0.21.1
376
+ nbclassic==1.3.3
377
+ scikit-image==0.25.2
378
+ tensorflow_decision_forests==1.12.0
379
+ simsimd==6.5.13
380
+ isoduration==20.11.0
381
+ momepy==0.11.0
382
+ pytest==8.4.2
383
+ nvidia-cuda-nvcc-cu12==12.5.82
384
+ cuda-bindings==12.9.4
385
+ torchsummary==1.5.1
386
+ earthengine-api==1.5.24
387
+ webencodings==0.5.1
388
+ optree==0.19.0
389
+ jax-cuda12-pjrt==0.7.2
390
+ langchain==1.2.10
391
+ safehttpx==0.1.7
392
+ holidays==0.91
393
+ google-cloud-firestore==2.23.0
394
+ fastjsonschema==2.21.2
395
+ pymc==5.28.0
396
+ pydantic==2.12.3
397
+ jaraco.context==6.1.0
398
+ pyogrio==0.12.1
399
+ numba-cuda==0.22.2
400
+ fonttools==4.61.1
401
+ httpimport==1.4.1
402
+ rsa==4.9.1
403
+ tomlkit==0.13.3
404
+ entrypoints==0.4
405
+ anyio==4.12.1
406
+ charset-normalizer==3.4.4
407
+ pooch==1.9.0
408
+ libcuml-cu12==26.2.0
409
+ astropy-iers-data==0.2026.2.23.0.48.33
410
+ ipyleaflet==0.20.0
411
+ cryptography==43.0.3
412
+ missingno==0.5.2
413
+ langgraph==1.0.9
414
+ pandas-datareader==0.10.0
415
+ pyviz_comms==3.0.6
416
+ cycler==0.12.1
417
+ tensorboard==2.19.0
418
+ gast==0.7.0
419
+ jax-cuda12-plugin==0.7.2
420
+ platformdirs==4.9.2
421
+ google-genai==1.64.0
422
+ inflect==7.5.0
423
+ httplib2==0.31.2
424
+ h11==0.16.0
425
+ alembic==1.18.4
426
+ multitasking==0.0.12
427
+ rmm-cu12==26.2.0
428
+ cvxpy==1.6.7
429
+ affine==2.4.0
430
+ cuml-cu12==26.2.0
431
+ pyparsing==3.3.2
432
+ cffi==2.0.0
433
+ h5netcdf==1.8.1
434
+ Markdown==3.10.2
435
+ google-cloud-translate==3.24.0
436
+ rpy2==3.5.17
437
+ regex==2025.11.3
438
+ tf_keras==2.19.0
439
+ google-auth==2.47.0
440
+ nvidia-libnvcomp-cu12==5.1.0.21
441
+ Send2Trash==2.1.0
442
+ cymem==2.0.13
443
+ pylibraft-cu12==26.2.0
444
+ shap==0.50.0
445
+ shapely==2.1.2
446
+ psygnal==0.15.1
447
+ uri-template==1.3.0
448
+ parso==0.8.6
449
+ webcolors==25.10.0
450
+ nltk==3.9.1
451
+ atpublic==5.1
452
+ ImageIO==2.37.2
453
+ sphinxcontrib-applehelp==2.0.0
454
+ bigframes==2.35.0
455
+ pydot==4.0.1
456
+ onemkl-license==2025.3.1
457
+ treescope==0.1.10
458
+ tcmlib==1.4.1
459
+ opentelemetry-sdk==1.38.0
460
+ tiktoken==0.12.0
461
+ nibabel==5.3.3
462
+ multiprocess==0.70.16
463
+ typing_extensions==4.15.0
464
+ PyYAML==6.0.3
465
+ defusedxml==0.7.1
466
+ sphinxcontrib-serializinghtml==2.0.0
467
+ bleach==6.3.0
468
+ tenacity==9.1.4
469
+ python-utils==3.9.1
470
+ google-cloud-bigquery==3.40.1
471
+ google-cloud-bigquery-connection==1.20.0
472
+ opentelemetry-resourcedetector-gcp==1.11.0a0
473
+ ormsgpack==1.12.2
474
+ pydotplus==2.0.2
475
+ pycryptodomex==3.23.0
476
+ openai==2.23.0
477
+ matplotlib==3.10.0
478
+ ml_dtypes==0.5.4
479
+ uvloop==0.22.1
480
+ google-pasta==0.2.0
481
+ giddy==2.3.8
482
+ ipyparallel==8.8.0
483
+ keras==3.10.0
484
+ cuvs-cu12==26.2.0
485
+ mcp==1.26.0
486
+ spacy-loggers==1.0.5
487
+ google-cloud-logging==3.13.0
488
+ rfc3987-syntax==1.1.0
489
+ google-ai-generativelanguage==0.6.15
490
+ keras-hub==0.21.1
491
+ pydata-google-auth==1.9.1
492
+ absl-py==1.4.0
493
+ ydf==0.15.0
494
+ narwhals==2.17.0
495
+ nvidia-cusparse-cu12==12.5.8.93
496
+ openpyxl==3.1.5
497
+ nvidia-cublas-cu12==12.8.4.1
498
+ roman-numerals==4.1.0
499
+ vega-datasets==0.9.0
500
+ mpmath==1.3.0
501
+ etils==1.13.0
502
+ osqp==1.1.1
503
+ traittypes==0.2.3
504
+ opentelemetry-exporter-gcp-monitoring==1.11.0a0
505
+ graphviz==0.21
506
+ google-cloud-trace==1.18.0
507
+ einops==0.8.2
508
+ torchdata==0.11.0
509
+ jax==0.7.2
510
+ cachetools==6.2.6
511
+ aiohappyeyeballs==2.6.1
512
+ annotated-doc==0.0.4
513
+ starlette==0.52.1
514
+ fastapi==0.133.0
515
+ typer==0.24.1
516
+ duckdb==1.3.2
517
+ blinker==1.9.0
518
+ referencing==0.37.0
519
+ googledrivedownloader==1.1.0
520
+ GDAL==3.8.4
521
+ cuda-python==12.9.4
522
+ pycparser==3.0
523
+ et_xmlfile==2.0.0
524
+ jieba==0.42.1
525
+ zict==3.0.0
526
+ hyperopt==0.2.7
527
+ python-louvain==0.16
528
+ SQLAlchemy==2.0.47
529
+ cuda-toolkit==12.8.1
530
+ PyDrive2==1.21.3
531
+ roman-numerals-py==4.1.0
532
+ urllib3==2.5.0
533
+ jaraco.functools==4.4.0
534
+ optax==0.2.7
535
+ pyOpenSSL==24.2.1
536
+ jupyter-console==6.6.3
537
+ libkvikio-cu12==26.2.0
538
+ gspread==6.2.1
539
+ docstring_parser==0.17.0
540
+ albumentations==2.0.8
541
+ jupytext==1.19.1
542
+ seaborn==0.13.2
543
+ librmm-cu12==26.2.0
544
+ cons==0.4.7
545
+ matplotlib-inline==0.2.1
546
+ pynndescent==0.6.0
547
+ stringzilla==4.6.0
548
+ flatbuffers==25.12.19
549
+ omegaconf==2.3.0
550
+ umap-learn==0.5.11
551
+ progressbar2==4.5.0
552
+ pexpect==4.9.0
553
+ torchcodec==0.10.0+cu128
554
+ ptyprocess==0.7.0
555
+ pygame==2.6.1
556
+ kiwisolver==1.4.9
557
+ Cython==3.0.12
558
+ shellingham==1.5.4
559
+ soupsieve==2.8.3
560
+ snowballstemmer==3.0.1
561
+ propcache==0.4.1
562
+ ucxx-cu12==0.48.0
563
+ nbformat==5.10.4
564
+ python-snappy==0.7.3
565
+ rasterstats==0.20.0
566
+ bqplot==0.12.45
567
+ nest-asyncio==1.6.0
568
+ opencv-python-headless==4.13.0.92
569
+ notebook==6.5.7
570
+ flax==0.11.2
571
+ google-cloud-functions==1.22.0
572
+ multipledispatch==1.0.0
573
+ googleapis-common-protos==1.72.0
574
+ eerepr==0.1.2
575
+ torchaudio==2.10.0+cu128
576
+ locket==1.0.0
577
+ prettytable==3.17.0
578
+ pygit2==1.19.1
579
+ fastai==2.8.7
580
+ msgpack==1.1.2
581
+ clarabel==0.11.1
582
+ cligj==0.7.2
583
+ google-cloud-secret-manager==2.26.0
584
+ spglm==1.1.0
585
+ ipytree==0.2.2
586
+ termcolor==3.3.0
587
+ tweepy==4.16.0
588
+ google-cloud-core==2.5.0
589
+ dataproc-spark-connect==1.0.2
590
+ mkl==2025.3.1
591
+ umf==1.0.3
592
+ textblob==0.19.0
593
+ firebase-admin==6.9.0
594
+ simple-parsing==0.1.8
595
+ debugpy==1.8.15
596
+ google-cloud-discoveryengine==0.13.12
597
+ fastcore==1.12.16
598
+ decorator==4.4.2
599
+ pickleshare==0.7.5
600
+ rasterio==1.5.0
601
+ networkx==3.6.1
602
+ typer-slim==0.24.0
603
+ wasabi==1.1.3
604
+ mgwr==2.2.1
605
+ hdbscan==0.8.41
606
+ pydub==0.25.1
607
+ tobler==0.13.0
608
+ more-itertools==10.8.0
609
+ keyrings.google-artifactregistry-auth==1.1.2
610
+ cloudpickle==3.1.2
611
+ nvidia-nvtx-cu12==12.8.90
612
+ fastlite==0.2.4
613
+ colorcet==3.1.0
614
+ lark==1.3.1
615
+ antlr4-python3-runtime==4.9.3
616
+ keras-nlp==0.21.1
617
+ music21==9.9.1
618
+ Pygments==2.19.2
619
+ triton==3.6.0
620
+ toolz==0.12.1
621
+ python-slugify==8.0.4
622
+ sqlparse==0.5.5
623
+ jupyter-leaflet==0.20.0
624
+ gym-notices==0.1.0
625
+ torchvision==0.25.0+cu128
626
+ prophet==1.3.0
627
+ google-cloud-datastore==2.23.0
628
+ semantic-version==2.10.0
629
+ fastprogress==1.1.5
630
+ etuples==0.3.10
631
+ pyspark==4.0.2
632
+ orjson==3.11.7
633
+ terminado==0.18.1
634
+ accelerate==1.12.0
635
+ panel==1.8.7
636
+ apswutils==0.1.2
637
+ pyproj==3.7.2
638
+ sphinxcontrib-htmlhelp==2.1.0
639
+ certifi==2026.1.4
640
+ grpc-interceptor==0.15.4
641
+ pyasn1==0.6.2
642
+ geocoder==1.38.1
643
+ idna==3.11
644
+ mizani==0.13.5
645
+ jupyter_server_terminals==0.5.4
646
+ httpcore==1.0.9
647
+ pyasn1_modules==0.4.2
648
+ ffmpy==1.0.0
649
+ pyperclip==1.11.0
650
+ safetensors==0.7.0
651
+ ndindex==1.10.1
652
+ tblib==3.2.2
653
+ docutils==0.21.2
654
+ scs==3.2.11
655
+ distro==1.9.0
656
+ tf-slim==1.1.0
657
+ babel==2.18.0
658
+ google-cloud-pubsub==2.35.0
659
+ google-api-python-client==2.190.0
660
+ tzlocal==5.3.1
661
+ groovy==0.1.2
662
+ plum-dispatch==2.7.1
663
+ dask==2026.1.1
664
+ blosc2==4.0.0
665
+ sqlalchemy-spanner==1.17.2
666
+ orbax-checkpoint==0.11.33
667
+ wandb==0.25.0
668
+ geopandas==1.1.2
669
+ proglog==0.1.12
670
+ python-dateutil==2.9.0.post0
671
+ tzdata==2025.3
672
+ editdistance==0.8.1
673
+ langsmith==0.7.6
674
+ xarray-einstats==0.10.0
675
+ pydantic_core==2.41.4
676
+ tabulate==0.9.0
677
+ mmh3==5.2.0
678
+ sentry-sdk==2.53.0
679
+ spopt==0.7.0
680
+ dlib==19.24.6
681
+ community==1.0.0b1
682
+ tensorflow==2.19.0
683
+ ale-py==0.11.2
684
+ murmurhash==1.0.15
685
+ notebook_shim==0.2.4
686
+ mdurl==0.1.2
687
+ diffusers==0.36.0
688
+ requests==2.32.4
689
+ Flask==3.1.3
690
+ prometheus_client==0.24.1
691
+ uvicorn==0.41.0
692
+ logical-unification==0.4.7
693
+ soundfile==0.13.1
694
+ itsdangerous==2.2.0
695
+ jsonpatch==1.33
696
+ plotnine==0.14.5
697
+ distributed==2026.1.1
698
+ google-auth-oauthlib==1.2.4
699
+ gdown==5.2.1
700
+ brotli==1.2.0
701
+ py4j==0.10.9.9
702
+ pytensor==2.38.0
703
+ text-unidecode==1.3
704
+ yfinance==0.2.66
705
+ arviz==0.22.0
706
+ cudf-cu12==26.2.1
707
+ wordcloud==1.9.6
708
+ jaraco.classes==3.4.0
709
+ albucore==0.0.24
710
+ python-dotenv==1.2.1
711
+ uritemplate==4.2.0
712
+ nx-cugraph-cu12==26.2.0
713
+ raft-dask-cu12==26.2.0
714
+ hpack==4.1.0
715
+ numexpr==2.14.1
716
+ pydantic-settings==2.13.1
717
+ rapids-logger==0.2.3
718
+ cmake==3.31.10
719
+ pillow==11.3.0
720
+ jsonschema-specifications==2025.9.1
721
+ tables==3.10.2
722
+ google-cloud-storage==3.9.0
723
+ mapclassify==2.10.0
724
+ altair==5.5.0
725
+ filelock==3.24.3
726
+ google-cloud-appengine-logging==1.8.0
727
+ cufflinks==0.17.3
728
+ cvxopt==1.3.2
729
+ six==1.17.0
730
+ watchdog==6.0.0
731
+ sse-starlette==3.2.0
732
+ PySocks==1.7.1
733
+ jupyterlab_widgets==3.0.16
734
+ spaghetti==1.7.6
735
+ intel-cmplr-lib-ur==2025.3.2
736
+ uc-micro-py==1.0.3
737
+ Sphinx==8.2.3
738
+ PyJWT==2.11.0
739
+ google-cloud-bigtable==2.35.0
740
+ numba==0.60.0
741
+ httptools==0.7.1
742
+ rich==13.9.4
743
+ pointpats==2.5.5
744
+ watchfiles==1.1.1
745
+ promise==2.3
746
+ polars==1.35.2
747
+ greenlet==3.3.2
748
+ rfc3986-validator==0.1.1
749
+ threadpoolctl==3.6.0
750
+ opentelemetry-exporter-otlp-proto-http==1.38.0
751
+ libcuvs-cu12==26.2.0
752
+ sniffio==1.3.1
753
+ pylibcugraph-cu12==26.2.0
754
+ holoviews==1.22.1
755
+ pandas-gbq==0.30.0
756
+ frozenlist==1.8.0
757
+ google-crc32c==1.8.0
758
+ torch==2.10.0+cu128
759
+ ipyevents==2.0.4
760
+ libucxx-cu12==0.48.0
761
+ cramjam==2.11.0
762
+ opentelemetry-exporter-otlp-proto-common==1.38.0
763
+ wurlitzer==3.1.1
764
+ confection==0.1.5
765
+ stanio==0.5.1
766
+ easydict==1.13
767
+ argon2-cffi==25.1.0
768
+ llvmlite==0.43.0
769
+ humanize==4.15.0
770
+ rapids-dask-dependency==26.2.0
771
+ argon2-cffi-bindings==25.1.0
772
+ future==1.0.0
773
+ rpds-py==0.30.0
774
+ psycopg2==2.9.11
775
+ iniconfig==2.3.0
776
+ jupyter-events==0.12.0
777
+ nvidia-nccl-cu12==2.27.5
778
+ GitPython==3.1.46
779
+ joblib==1.5.3
780
+ beartype==0.22.9
781
+ hf-xet==1.3.0
782
+ Bottleneck==1.4.2
783
+ apsw==3.51.2.0
784
+ bokeh==3.8.2
785
+ google-cloud-dataproc==5.25.0
786
+ nvidia-cuda-nvrtc-cu12==12.8.93
787
+ colour==0.1.5
788
+ zipp==3.23.0
789
+ blis==1.3.3
790
+ click-plugins==1.1.1.2
791
+ httpx-sse==0.4.3
792
+ nvidia-nvshmem-cu12==3.4.5
793
+ sphinxcontrib-jsmath==1.0.1
794
+ prompt_toolkit==3.0.52
795
+ esda==2.8.1
796
+ param==2.3.2
797
+ google-cloud-speech==2.36.1
798
+ portpicker==1.5.2
799
+ PyWavelets==1.9.0
800
+ google-cloud-monitoring==2.29.1
801
+ Farama-Notifications==0.0.4
802
+ pytz==2025.2
803
+ MarkupSafe==3.0.3
804
+ pyomo==6.10.0
805
+ packaging==26.0
806
+ betterproto==2.0.0b6
807
+ libraft-cu12==26.2.0
808
+ typeguard==4.5.1
809
+ imbalanced-learn==0.14.1
810
+ google-adk==1.25.1
811
+ CacheControl==0.14.4
812
+ ipykernel==6.17.1
813
+ jsonpickle==4.1.1
814
+ xyzservices==2025.11.0
815
+ websockets==15.0.1
816
+ PyGObject==3.48.2
817
+ pandas-stubs==2.2.2.240909
818
+ proto-plus==1.27.1
819
+ segregation==2.5.3
820
+ ratelim==0.1.6
821
+ miniKanren==1.0.5
822
+ geographiclib==2.1
823
+ Jinja2==3.1.6
824
+ frozendict==2.4.7
825
+ libcudf-cu12==26.2.1
826
+ nvidia-cufft-cu12==11.3.3.83
827
+ typing-inspection==0.4.2
828
+ gradio_client==1.14.0
829
+ simplejson==3.20.2
830
+ ruff==0.15.2
831
+ imageio-ffmpeg==0.6.0
832
+ python-json-logger==4.0.0
833
+ cucim-cu12==26.2.0
834
+ jupyter_kernel_gateway==2.5.2
835
+ contourpy==1.3.3
836
+ google-api-core==2.30.0
837
+ opencv-contrib-python==4.13.0.92
838
+ nvidia-cudnn-cu12==9.10.2.21
839
+ opentelemetry-proto==1.38.0
840
+ dask-cudf-cu12==26.2.1
841
+ nvidia-nvimgcodec-cu12==0.7.0.11
842
+ statsmodels==0.14.6
843
+ opentelemetry-exporter-gcp-trace==1.11.0
844
+ deprecation==2.1.0
845
+ tinycss2==1.4.0
846
+ mdit-py-plugins==0.5.0
847
+ tensorflow-datasets==4.9.9
848
+ opentelemetry-api==1.38.0
849
+ langgraph-prebuilt==1.0.8
850
+ keyring==25.7.0
851
+ inequality==1.1.2
852
+ cyipopt==1.5.0
853
+ sympy==1.14.0
854
+ oauth2client==4.1.3
855
+ python-fasthtml==0.12.47
856
+ gspread-dataframe==4.0.0
857
+ wcwidth==0.6.0
858
+ geopy==2.4.1
859
+ natsort==8.4.0
860
+ timm==1.0.25
861
+ rfc3339-validator==0.1.4
862
+ stumpy==1.13.0
863
+ parsy==2.2
864
+ libucx-cu12==1.19.0
865
+ pyerfa==2.0.1.5
866
+ astropy==7.2.0
867
+ curl_cffi==0.14.0
868
+ xarray==2025.12.0
869
+ preshed==3.0.12
870
+ Werkzeug==3.1.6
871
+ SecretStorage==3.5.0
872
+ grpcio==1.78.1
873
+ slicer==0.0.8
874
+ cudf-polars-cu12==26.2.1
875
+ aiosqlite==0.22.1
876
+ grpcio-status==1.71.2
877
+ libpysal==4.14.1
878
+ gitdb==4.0.12
879
+ hyperframe==6.1.0
880
+ opentelemetry-semantic-conventions==0.59b0
881
+ wheel==0.46.3
882
+ h2==4.3.0
883
+ google-cloud-audit-log==0.4.0
884
+ tqdm==4.67.3
885
+ httpx==0.28.1
886
+ cloudpathlib==0.23.0
887
+ thinc==8.3.10
888
+ audioread==3.1.0
889
+ fastdownload==0.0.7
890
+ gcsfs==2025.3.0
891
+ nvidia-nvjitlink-cu12==12.8.93
892
+ access==1.1.10.post3
893
+ tornado==6.5.1
894
+ pandocfilters==1.5.1
895
+ fasttransform==0.0.2
896
+ nvidia-curand-cu12==10.3.9.90
897
+ python-multipart==0.0.22
898
+ yellowbrick==1.5
899
+ jupyter_client==7.4.9
900
+ google-generativeai==0.8.6
901
+ blobfile==3.2.0
902
+ importlib_metadata==8.7.1
903
+ tensorboard-data-server==0.7.2
904
+ attrs==25.4.0
905
+ tbb==2022.3.1
906
+ pluggy==1.6.0
907
+ cuda-pathfinder==1.3.5
908
+ rtree==1.4.1
909
+ arrow==1.4.0
910
+ wrapt==2.1.1
911
+ anywidget==0.9.21
912
+ mlxtend==0.23.4
913
+ smmap==5.0.2
914
+ aiohttp==3.13.3
915
+ opentelemetry-exporter-gcp-logging==1.11.0a0
916
+ sortedcontainers==2.4.0
917
+ pyshp==3.0.3
918
+ sklearn-compat==0.1.5
919
+ xxhash==3.6.0
920
+ zstandard==0.25.0
921
+ Mako==1.3.10
922
+ google-cloud-iam==2.21.0
923
+ autograd==1.8.0
924
+ glob2==0.7
925
+ tensorstore==0.1.81
926
+ tensorflow-probability==0.25.0
927
+ colorlover==0.3.0
928
+ ipyfilechooser==0.6.0
929
+ gradio==5.50.0
930
+ cmdstanpy==1.3.0
931
+ dm-tree==0.1.9
932
+ html5lib==1.1
933
+ python-apt==0.0.0
934
+ PyGObject==3.42.1
935
+ blinker==1.4
936
+ jeepney==0.7.1
937
+ six==1.16.0
938
+ oauthlib==3.2.0
939
+ wadllib==1.3.6
940
+ launchpadlib==1.10.16
941
+ dbus-python==1.2.18
942
+ PyJWT==2.3.0
943
+ importlib-metadata==4.6.4
944
+ httplib2==0.20.2
945
+ zipp==1.0.0
946
+ pyparsing==2.4.7
947
+ lazr.restfulclient==0.14.4
948
+ SecretStorage==3.3.1
949
+ distro==1.7.0
950
+ lazr.uri==1.0.6
951
+ more-itertools==8.10.0
952
+ python-apt==2.4.0+ubuntu4.1
953
+ cryptography==3.4.8
954
+ keyring==23.5.0
955
+ Markdown==3.3.6
956
+ Mako==1.1.3
957
+ MarkupSafe==2.0.1
958
+ packaging==24.1
959
+ inflect==7.3.1
960
+ autocommand==2.2.2
961
+ typeguard==4.3.0
962
+ jaraco.text==3.12.1
963
+ importlib_resources==6.4.0
964
+ wheel==0.43.0
965
+ zipp==3.19.2
966
+ platformdirs==4.2.2
967
+ importlib_metadata==8.0.0
968
+ tomli==2.0.1
969
+ jaraco.collections==5.1.0
970
+ more-itertools==10.3.0
971
+ typing_extensions==4.12.2
972
+ backports.tarfile==1.2.0
973
+ jaraco.functools==4.0.1
974
+ jaraco.context==5.3.0
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/wandb-metadata.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.6.113+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.12.12",
4
+ "startedAt": "2026-03-31T06:55:27.783331Z",
5
+ "program": "kaggle.ipynb",
6
+ "codePath": "kaggle.ipynb",
7
+ "email": "suvraadeep@gmail.com",
8
+ "root": "/kaggle/working",
9
+ "host": "7a129c53b2b5",
10
+ "executable": "/usr/bin/python3",
11
+ "cpu_count": 2,
12
+ "cpu_count_logical": 4,
13
+ "gpu": "Tesla T4",
14
+ "gpu_count": 2,
15
+ "disk": {
16
+ "/": {
17
+ "total": "8656922775552",
18
+ "used": "7347648929792"
19
+ }
20
+ },
21
+ "memory": {
22
+ "total": "33662472192"
23
+ },
24
+ "gpu_nvidia": [
25
+ {
26
+ "name": "Tesla T4",
27
+ "memoryTotal": "16106127360",
28
+ "cudaCores": 2560,
29
+ "architecture": "Turing",
30
+ "uuid": "GPU-c7ebff42-1f98-33a6-9169-a1e3925c4f52"
31
+ },
32
+ {
33
+ "name": "Tesla T4",
34
+ "memoryTotal": "16106127360",
35
+ "cudaCores": 2560,
36
+ "architecture": "Turing",
37
+ "uuid": "GPU-4f057d35-8a41-ce55-5d7d-60c77ebb7156"
38
+ }
39
+ ],
40
+ "cudaVersion": "13.0",
41
+ "writerId": "jyd6p8uwdd5m81jhhfteje7lwq5s32ua"
42
+ }
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"n_features":234,"_step":6,"fold_4_auc":0.785662839195165,"n_train":307511,"fold_1_auc":0.7759344239846245,"fold_5_auc":0.7740783393085308,"_runtime":340,"fold_std":0.004696875321178463,"feature_importance":{"log_mode":"IMMUTABLE","sha256":"9280c5e00d174ed85360bfef885ef1bdd68e2abd997bb9ce86ab5bad73c62e80","ncols":2,"nrows":20,"_type":"table-file","size":909,"artifact_path":"wandb-client-artifact://q52w3a226nb82x86h1fbmlu6pk4a1pfhhgo1ypln5p6f3m1jo0y139ufnfpw64fhzjscu59hx5ez5aidin3iwxikjs3mxvvucbscl5amw5sab6jnh1njmqdzb1qyswkv/feature_importance.table.json","_latest_artifact_path":"wandb-client-artifact://0qa79omgcechd1vsci896g88gfa40010jtogetx60ek4susdjv3aqe995rhq8ol14qba5im486ruer71vxou9p08yyebvgx3y8lwysph1yelxvlnmy0safqn6dtjvl31:latest/feature_importance.table.json","path":"media/table/feature_importance_6_9280c5e00d174ed85360.table.json"},"oof_auc":0.7799152308164182,"fold_3_auc":0.7789614162608197,"fold":5,"fold_2_auc":0.7850577118521481,"_wandb":{"runtime":340},"_timestamp":1.77494047535925e+09}
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-03-31T06:55:28.579493541Z","level":"INFO","msg":"stream: starting","core version":"0.25.0"}
2
+ {"time":"2026-03-31T06:55:28.73430316Z","level":"INFO","msg":"stream: created new stream","id":"jxzjz5r3"}
3
+ {"time":"2026-03-31T06:55:28.735368816Z","level":"INFO","msg":"handler: started","stream_id":"jxzjz5r3"}
4
+ {"time":"2026-03-31T06:55:28.735514769Z","level":"INFO","msg":"stream: started","id":"jxzjz5r3"}
5
+ {"time":"2026-03-31T06:55:28.735571676Z","level":"INFO","msg":"sender: started","stream_id":"jxzjz5r3"}
6
+ {"time":"2026-03-31T06:55:28.735597161Z","level":"INFO","msg":"writer: started","stream_id":"jxzjz5r3"}
7
+ {"time":"2026-03-31T07:01:16.531006817Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
8
+ {"time":"2026-03-31T07:01:16.691833668Z","level":"INFO","msg":"handler: operation stats","stats":{}}
9
+ {"time":"2026-03-31T07:01:16.698883138Z","level":"INFO","msg":"stream: closing","id":"jxzjz5r3"}
10
+ {"time":"2026-03-31T07:01:16.698899918Z","level":"INFO","msg":"handler: closed","stream_id":"jxzjz5r3"}
11
+ {"time":"2026-03-31T07:01:16.698949589Z","level":"INFO","msg":"sender: closed","stream_id":"jxzjz5r3"}
12
+ {"time":"2026-03-31T07:01:16.698960323Z","level":"INFO","msg":"stream: closed","id":"jxzjz5r3"}
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/logs/debug.log ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-03-31 06:55:27,790 INFO MainThread:177 [wandb_setup.py:_flush():81] Current SDK version is 0.25.0
2
+ 2026-03-31 06:55:27,790 INFO MainThread:177 [wandb_setup.py:_flush():81] Configure stats pid to 177
3
+ 2026-03-31 06:55:27,790 INFO MainThread:177 [wandb_setup.py:_flush():81] Loading settings from environment variables
4
+ 2026-03-31 06:55:27,790 INFO MainThread:177 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /kaggle/working/wandb/run-20260331_065527-jxzjz5r3/logs/debug.log
5
+ 2026-03-31 06:55:27,790 INFO MainThread:177 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /kaggle/working/wandb/run-20260331_065527-jxzjz5r3/logs/debug-internal.log
6
+ 2026-03-31 06:55:27,790 INFO MainThread:177 [wandb_init.py:monkeypatch_ipython():636] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7e0b144d1940>
7
+ 2026-03-31 06:55:27,791 INFO MainThread:177 [wandb_init.py:init():844] calling init triggers
8
+ 2026-03-31 06:55:27,791 INFO MainThread:177 [wandb_init.py:init():849] wandb.init called with sweep_config: {}
9
+ config: {'_wandb': {}}
10
+ 2026-03-31 06:55:27,791 INFO MainThread:177 [wandb_init.py:init():892] starting backend
11
+ 2026-03-31 06:55:28,558 INFO MainThread:177 [wandb_init.py:init():895] sending inform_init request
12
+ 2026-03-31 06:55:28,569 INFO MainThread:177 [wandb_init.py:init():903] backend started and connected
13
+ 2026-03-31 06:55:28,577 INFO MainThread:177 [wandb_run.py:_label_probe_notebook():1333] probe notebook
14
+ 2026-03-31 06:55:34,034 INFO MainThread:177 [wandb_init.py:init():973] updated telemetry
15
+ 2026-03-31 06:55:34,270 INFO MainThread:177 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout
16
+ 2026-03-31 06:55:34,618 INFO MainThread:177 [wandb_init.py:init():1042] starting run threads in backend
17
+ 2026-03-31 06:55:35,321 INFO MainThread:177 [wandb_run.py:_console_start():2524] atexit reg
18
+ 2026-03-31 06:55:35,321 INFO MainThread:177 [wandb_run.py:_redirect():2373] redirect: wrap_raw
19
+ 2026-03-31 06:55:35,321 INFO MainThread:177 [wandb_run.py:_redirect():2442] Wrapping output streams.
20
+ 2026-03-31 06:55:35,321 INFO MainThread:177 [wandb_run.py:_redirect():2465] Redirects installed.
21
+ 2026-03-31 06:55:35,337 INFO MainThread:177 [wandb_init.py:init():1082] run started, returning control to user process
22
+ 2026-03-31 07:01:15,359 INFO MainThread:177 [wandb_run.py:_finish():2291] finishing run suvradeep-iit-guwahati-/credit-invisibility/jxzjz5r3
23
+ 2026-03-31 07:01:15,360 INFO MainThread:177 [jupyter.py:save_history():435] not saving jupyter history
24
+ 2026-03-31 07:01:15,360 INFO MainThread:177 [jupyter.py:save_ipynb():362] not saving jupyter notebook
25
+ 2026-03-31 07:01:15,360 INFO MainThread:177 [wandb_init.py:_jupyter_teardown():621] cleaning up jupyter logic
26
+ 2026-03-31 07:01:15,360 INFO MainThread:177 [wandb_run.py:_atexit_cleanup():2490] got exitcode: 0
27
+ 2026-03-31 07:01:15,361 INFO MainThread:177 [wandb_run.py:_restore():2472] restore
28
+ 2026-03-31 07:01:15,361 INFO MainThread:177 [wandb_run.py:_restore():2478] restore done
29
+ 2026-03-31 07:01:16,696 INFO MainThread:177 [wandb_run.py:_footer_sync_info():3868] logging synced files
kaggle_output/wandb/run-20260331_065527-jxzjz5r3/run-jxzjz5r3.wandb ADDED
Binary file (33.6 kB). View file
 
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/config.yaml ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.25.0
4
+ e:
5
+ s21p4sab0yaskqym8r37eeuqwjpzso9d:
6
+ codePath: kaggle.ipynb
7
+ cpu_count: 2
8
+ cpu_count_logical: 4
9
+ cudaVersion: "13.0"
10
+ disk:
11
+ /:
12
+ total: "8656922775552"
13
+ used: "7347653709824"
14
+ email: suvraadeep@gmail.com
15
+ executable: /usr/bin/python3
16
+ gpu: Tesla T4
17
+ gpu_count: 2
18
+ gpu_nvidia:
19
+ - architecture: Turing
20
+ cudaCores: 2560
21
+ memoryTotal: "16106127360"
22
+ name: Tesla T4
23
+ uuid: GPU-c7ebff42-1f98-33a6-9169-a1e3925c4f52
24
+ - architecture: Turing
25
+ cudaCores: 2560
26
+ memoryTotal: "16106127360"
27
+ name: Tesla T4
28
+ uuid: GPU-4f057d35-8a41-ce55-5d7d-60c77ebb7156
29
+ host: 7a129c53b2b5
30
+ memory:
31
+ total: "33662472192"
32
+ os: Linux-6.6.113+-x86_64-with-glibc2.35
33
+ program: kaggle.ipynb
34
+ python: CPython 3.12.12
35
+ root: /kaggle/working
36
+ startedAt: "2026-03-31T09:47:28.010618Z"
37
+ writerId: s21p4sab0yaskqym8r37eeuqwjpzso9d
38
+ m: []
39
+ python_version: 3.12.12
40
+ t:
41
+ "1":
42
+ - 1
43
+ - 5
44
+ - 6
45
+ - 8
46
+ - 11
47
+ - 35
48
+ - 49
49
+ - 53
50
+ - 54
51
+ - 71
52
+ - 75
53
+ - 105
54
+ "2":
55
+ - 1
56
+ - 5
57
+ - 6
58
+ - 8
59
+ - 11
60
+ - 35
61
+ - 49
62
+ - 53
63
+ - 54
64
+ - 71
65
+ - 75
66
+ - 105
67
+ "3":
68
+ - 2
69
+ - 13
70
+ - 15
71
+ - 16
72
+ "4": 3.12.12
73
+ "5": 0.25.0
74
+ "6": 4.41.2
75
+ "8":
76
+ - 1
77
+ - 2
78
+ - 12
79
+ "12": 0.25.0
80
+ "13": linux-x86_64
81
+ lgbm:
82
+ value:
83
+ bagging_fraction: 0.8170784332632994
84
+ bagging_freq: 1
85
+ boosting_type: gbdt
86
+ device: gpu
87
+ feature_fraction: 0.7123738038749523
88
+ force_col_wise: true
89
+ gpu_use_dp: false
90
+ learning_rate: 0.02273805573563183
91
+ max_bin: 255
92
+ max_depth: 9
93
+ metric: auc
94
+ min_child_samples: 82
95
+ min_gain_to_split: 0.0993578407670862
96
+ n_jobs: -1
97
+ num_leaves: 112
98
+ objective: binary
99
+ reg_alpha: 0.0002359137306347715
100
+ reg_lambda: 8.598737339212267
101
+ seed: 42
102
+ verbosity: -1
103
+ n_folds:
104
+ value: 5
105
+ xgb:
106
+ value:
107
+ colsample_bytree: 0.8226259232371986
108
+ device: cuda
109
+ eval_metric: auc
110
+ gamma: 0.26516420723672285
111
+ learning_rate: 0.028084447839949865
112
+ max_bin: 256
113
+ max_depth: 5
114
+ min_child_weight: 4
115
+ n_jobs: -1
116
+ objective: binary:logistic
117
+ reg_alpha: 8.576808828106026
118
+ reg_lambda: 6.598703417106125
119
+ scale_pos_weight: 1.3690073364184308
120
+ seed: 42
121
+ subsample: 0.6252365594243677
122
+ tree_method: hist
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/output.log ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ 🚀 Fold 1
3
+ [100] valid_0's auc: 0.765307
4
+ [200] valid_0's auc: 0.774084
5
+ [300] valid_0's auc: 0.777392
6
+ [400] valid_0's auc: 0.77907
7
+ [500] valid_0's auc: 0.780086
8
+ [600] valid_0's auc: 0.780793
9
+ [700] valid_0's auc: 0.781234
10
+ [800] valid_0's auc: 0.781458
11
+ [900] valid_0's auc: 0.781597
12
+ LGBM: 0.78174 | XGB: 0.78181
13
+
14
+ 🚀 Fold 2
15
+ [100] valid_0's auc: 0.773239
16
+ [200] valid_0's auc: 0.783192
17
+ [300] valid_0's auc: 0.786755
18
+ [400] valid_0's auc: 0.788592
19
+ [500] valid_0's auc: 0.78945
20
+ [600] valid_0's auc: 0.789662
21
+ [700] valid_0's auc: 0.789836
22
+ [800] valid_0's auc: 0.789936
23
+ LGBM: 0.79000 | XGB: 0.79209
24
+
25
+ 🚀 Fold 3
26
+ [100] valid_0's auc: 0.76562
27
+ [200] valid_0's auc: 0.774939
28
+ [300] valid_0's auc: 0.779674
29
+ [400] valid_0's auc: 0.7817
30
+ [500] valid_0's auc: 0.782736
31
+ [600] valid_0's auc: 0.78322
32
+ [700] valid_0's auc: 0.783531
33
+ [800] valid_0's auc: 0.783716
34
+ [900] valid_0's auc: 0.78377
35
+ LGBM: 0.78383 | XGB: 0.78411
36
+
37
+ 🚀 Fold 4
38
+ [100] valid_0's auc: 0.773374
39
+ [200] valid_0's auc: 0.782137
40
+ [300] valid_0's auc: 0.786103
41
+ [400] valid_0's auc: 0.78795
42
+ [500] valid_0's auc: 0.788917
43
+ [600] valid_0's auc: 0.789669
44
+ [700] valid_0's auc: 0.789976
45
+ [800] valid_0's auc: 0.790266
46
+ [900] valid_0's auc: 0.790474
47
+ [1000] valid_0's auc: 0.790344
48
+ LGBM: 0.79050 | XGB: 0.79068
49
+
50
+ 🚀 Fold 5
51
+ [100] valid_0's auc: 0.763112
52
+ [200] valid_0's auc: 0.773904
53
+ [300] valid_0's auc: 0.778297
54
+ [400] valid_0's auc: 0.780052
55
+ [500] valid_0's auc: 0.781518
56
+ [600] valid_0's auc: 0.782167
57
+ [700] valid_0's auc: 0.782807
58
+ [800] valid_0's auc: 0.782822
59
+ [900] valid_0's auc: 0.783009
60
+ [1000] valid_0's auc: 0.782948
61
+ LGBM: 0.78309 | XGB: 0.78362
62
+
63
+ 🔍 Optimizing blend weights...
64
+ ✅ Best weight → LGBM: 0.49, XGB: 0.51
65
+ 🏆 Best OOF AUC: 0.78731
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/requirements.txt ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==75.2.0
2
+ types-setuptools==80.10.0.20260124
3
+ requirements-parser==0.9.0
4
+ pip==24.1.2
5
+ logistro==2.0.1
6
+ tokenizers==0.19.1
7
+ huggingface_hub==0.36.2
8
+ scikit-learn==1.4.2
9
+ xgboost==2.0.3
10
+ sentence-transformers==2.7.0
11
+ choreographer==1.2.1
12
+ lightgbm==4.3.0
13
+ pytest-timeout==2.4.0
14
+ numpy==1.26.4
15
+ category-encoders==2.6.3
16
+ scipy==1.11.4
17
+ river==0.23.0
18
+ optuna-integration==4.8.0
19
+ kaleido==1.2.0
20
+ transformers==4.41.2
21
+ plotly==6.6.0
22
+ pytools==2025.2.5
23
+ pycuda==2026.1
24
+ siphash24==1.8
25
+ protobuf==5.29.5
26
+ torchtune==0.6.1
27
+ learntools==0.3.5
28
+ rouge_score==0.1.2
29
+ pyclipper==1.4.0
30
+ urwid_readline==0.15.1
31
+ h2o==3.46.0.10
32
+ rfc3161-client==1.0.5
33
+ blake3==1.0.8
34
+ mpld3==0.5.12
35
+ qgrid==1.3.1
36
+ ConfigSpace==1.2.2
37
+ woodwork==0.31.0
38
+ ujson==5.12.0
39
+ y-py==0.6.2
40
+ ipywidgets==8.1.5
41
+ scikit-multilearn==0.2.0
42
+ lightning-utilities==0.15.3
43
+ pytesseract==0.3.13
44
+ Cartopy==0.25.0
45
+ odfpy==1.4.1
46
+ Boruta==0.4.3
47
+ docstring-to-markdown==0.17
48
+ torchinfo==1.8.0
49
+ clint==0.5.1
50
+ comm==0.2.3
51
+ Deprecated==1.3.1
52
+ pymongo==4.16.0
53
+ tensorflow-io-gcs-filesystem==0.37.1
54
+ jmespath==1.1.0
55
+ pygltflib==1.16.5
56
+ keras-core==0.1.7
57
+ pandas==2.3.3
58
+ securesystemslib==1.3.1
59
+ ghapi==1.0.11
60
+ qtconsole==5.7.1
61
+ pyemd==2.0.0
62
+ pandas-profiling==3.6.6
63
+ nilearn==0.13.1
64
+ in-toto-attestation==0.9.3
65
+ a2a-sdk==0.3.25
66
+ keras-tuner==1.4.8
67
+ fastuuid==0.14.0
68
+ scikit-surprise==1.1.4
69
+ vtk==9.3.1
70
+ jupyter-ydoc==0.2.5
71
+ aiofiles==22.1.0
72
+ pytokens==0.4.1
73
+ featuretools==1.31.0
74
+ plotly-express==0.4.1
75
+ marshmallow==3.26.2
76
+ easyocr==1.7.2
77
+ ppft==1.7.8
78
+ openslide-bin==4.0.0.13
79
+ fuzzywuzzy==0.18.0
80
+ id==1.6.1
81
+ openslide-python==1.4.3
82
+ kaggle-environments==1.27.3
83
+ pyarrow==23.0.1
84
+ pandasql==0.7.3
85
+ update-checker==0.18.0
86
+ pathos==0.3.2
87
+ jupyter_server_fileid==0.9.3
88
+ fasttext==0.9.3
89
+ coverage==7.13.5
90
+ s3fs==2026.2.0
91
+ stopit==1.1.2
92
+ haversine==2.9.0
93
+ jupyter_server==2.12.5
94
+ geojson==3.2.0
95
+ botocore==1.42.70
96
+ fury==0.12.0
97
+ ipympl==0.10.0
98
+ ipython_pygments_lexers==1.1.1
99
+ olefile==0.47
100
+ jupyter_server_proxy==4.4.0
101
+ datasets==4.8.3
102
+ pytorch-ignite==0.5.3
103
+ xvfbwrapper==0.2.22
104
+ daal==2025.11.0
105
+ open_spiel==1.6.12
106
+ jupyter-lsp==1.5.1
107
+ trx-python==0.4.0
108
+ gpxpy==1.6.2
109
+ papermill==2.7.0
110
+ simpervisor==1.0.0
111
+ kagglehub==1.0.0
112
+ mlcrate==0.2.0
113
+ kaggle==2.0.0
114
+ dask-jobqueue==0.9.0
115
+ model-signing==1.1.1
116
+ jupyterlab==3.6.8
117
+ args==0.1.0
118
+ ImageHash==4.3.2
119
+ typing-inspect==0.9.0
120
+ PyUpSet==0.1.1.post7
121
+ dacite==1.9.2
122
+ pycryptodome==3.23.0
123
+ google-cloud-videointelligence==2.18.0
124
+ visions==0.8.1
125
+ deap==1.4.3
126
+ lml==0.2.0
127
+ jiter==0.10.0
128
+ ypy-websocket==0.8.4
129
+ cytoolz==1.1.0
130
+ path.py==12.5.0
131
+ tensorflow-io==0.37.1
132
+ wavio==0.0.9
133
+ pdf2image==1.17.0
134
+ line_profiler==5.0.2
135
+ fsspec==2026.2.0
136
+ aiobotocore==3.3.0
137
+ optuna==4.8.0
138
+ fastgit==0.0.4
139
+ litellm==1.82.4
140
+ pyLDAvis==3.4.1
141
+ Janome==0.5.0
142
+ langid==1.1.6
143
+ sigstore-models==0.0.6
144
+ pokerkit==0.6.3
145
+ pyaml==26.2.1
146
+ scikit-plot==0.3.7
147
+ nbdev==3.0.12
148
+ simpleitk==2.5.3
149
+ ml_collections==1.1.0
150
+ filetype==1.2.0
151
+ Wand==0.7.0
152
+ jupyter_server_ydoc==0.8.0
153
+ pyjson5==2.0.0
154
+ email-validator==2.3.0
155
+ execnb==0.1.18
156
+ colorama==0.4.6
157
+ ruamel.yaml==0.19.1
158
+ python-lsp-server==1.14.0
159
+ black==26.3.1
160
+ PyArabic==0.6.15
161
+ gymnasium==1.2.0
162
+ path==17.1.1
163
+ gensim==4.4.0
164
+ pypdf==6.9.1
165
+ TPOT==1.1.0
166
+ Pympler==1.1
167
+ bayesian-optimization==3.2.1
168
+ nbconvert==6.4.5
169
+ kornia==0.8.2
170
+ pathspec==1.0.4
171
+ pybind11==3.0.2
172
+ sigstore==4.2.0
173
+ funcy==2.0
174
+ func_timeout==4.3.5
175
+ testpath==0.6.0
176
+ aioitertools==0.13.0
177
+ google-cloud-vision==3.12.1
178
+ ray==2.54.0
179
+ kornia_rs==0.1.10
180
+ traitlets==5.14.3
181
+ gymnax==0.0.8
182
+ dnspython==2.8.0
183
+ chex==0.1.90
184
+ gym==0.26.2
185
+ nbclient==0.5.13
186
+ ydata-profiling==4.18.1
187
+ POT==0.9.6.post1
188
+ deepdiff==8.6.2
189
+ squarify==0.4.4
190
+ dataclasses-json==0.6.7
191
+ pettingzoo==1.24.0
192
+ pytorch-lightning==2.6.1
193
+ segment_anything==1.0
194
+ emoji==2.15.0
195
+ python-bidi==0.6.7
196
+ rgf-python==3.12.0
197
+ ninja==1.13.0
198
+ widgetsnbextension==4.0.15
199
+ minify_html==0.18.1
200
+ urwid==3.0.5
201
+ jedi==0.19.2
202
+ jupyterlab-lsp==3.10.2
203
+ python-lsp-jsonrpc==1.1.2
204
+ QtPy==2.4.3
205
+ pydicom==3.0.1
206
+ multimethod==1.12
207
+ torchmetrics==1.9.0
208
+ asttokens==3.0.1
209
+ docker==7.1.0
210
+ dask-expr==2.0.0
211
+ s3transfer==0.16.0
212
+ build==1.4.0
213
+ Shimmy==2.0.0
214
+ igraph==1.0.0
215
+ puremagic==2.1.0
216
+ jupyterlab_server==2.28.0
217
+ isoweek==1.3.3
218
+ texttable==1.7.0
219
+ kt-legacy==1.0.5
220
+ orderly-set==5.5.0
221
+ pyexcel-io==0.6.7
222
+ catboost==1.2.10
223
+ kagglesdk==0.1.16
224
+ mamba==0.11.3
225
+ dipy==1.12.0
226
+ colorlog==6.10.1
227
+ asn1crypto==1.5.1
228
+ pyexcel-ods==0.6.0
229
+ lime==0.2.0.1
230
+ pox==0.3.7
231
+ rfc8785==0.1.4
232
+ sigstore-rekor-types==0.0.18
233
+ cesium==0.12.4
234
+ boto3==1.42.70
235
+ tuf==6.0.0
236
+ hep_ml==0.8.0
237
+ pyproject_hooks==1.2.0
238
+ phik==0.12.5
239
+ pudb==2025.1.5
240
+ mne==1.11.0
241
+ keras-cv==0.9.0
242
+ dill==0.4.1
243
+ gatspy==0.3
244
+ scikit-learn-intelex==2025.11.0
245
+ onnx==1.20.1
246
+ scikit-optimize==0.10.2
247
+ mypy_extensions==1.1.0
248
+ mistune==0.8.4
249
+ json5==0.13.0
250
+ google-colab==1.0.0
251
+ psutil==5.9.5
252
+ jsonschema==4.26.0
253
+ astunparse==1.6.3
254
+ pycocotools==2.0.11
255
+ lxml==6.0.2
256
+ ipython==7.34.0
257
+ oauthlib==3.3.1
258
+ grpc-google-iam-v1==0.14.3
259
+ array_record==0.8.3
260
+ PuLP==3.3.0
261
+ nvidia-cuda-runtime-cu12==12.8.90
262
+ dask-cuda==26.2.0
263
+ immutabledict==4.3.1
264
+ peewee==4.0.0
265
+ fiona==1.10.1
266
+ aiosignal==1.4.0
267
+ libclang==18.1.1
268
+ annotated-types==0.7.0
269
+ spreg==1.8.5
270
+ grain==0.2.15
271
+ geemap==0.35.3
272
+ patsy==1.0.2
273
+ imagesize==1.4.1
274
+ py-cpuinfo==9.0.0
275
+ pyzmq==26.2.1
276
+ nvidia-cufile-cu12==1.13.1.3
277
+ multidict==6.7.1
278
+ srsly==2.5.2
279
+ intel-openmp==2025.3.2
280
+ uuid_utils==0.14.1
281
+ google-cloud-language==2.19.0
282
+ soxr==1.0.0
283
+ jupyterlab_pygments==0.3.0
284
+ backcall==0.2.0
285
+ tensorflow-hub==0.16.1
286
+ google==3.0.0
287
+ requests-oauthlib==2.0.0
288
+ dopamine_rl==4.1.2
289
+ overrides==7.7.0
290
+ db-dtypes==1.5.0
291
+ jeepney==0.9.0
292
+ langgraph-sdk==0.3.9
293
+ ipython-genutils==0.2.0
294
+ nvidia-cuda-cupti-cu12==12.8.90
295
+ libcugraph-cu12==26.2.0
296
+ catalogue==2.0.10
297
+ beautifulsoup4==4.13.5
298
+ nvidia-ml-py==13.590.48
299
+ sphinxcontrib-devhelp==2.0.0
300
+ partd==1.4.2
301
+ sklearn-pandas==2.2.0
302
+ sphinxcontrib-qthelp==2.0.0
303
+ google-cloud-spanner==3.63.0
304
+ h5py==3.15.1
305
+ python-box==7.4.1
306
+ distributed-ucxx-cu12==0.48.0
307
+ xlrd==2.0.2
308
+ branca==0.8.2
309
+ chardet==5.2.0
310
+ pycairo==1.29.0
311
+ Authlib==1.6.8
312
+ cuda-core==0.3.2
313
+ sentencepiece==0.2.1
314
+ nvidia-cusparselt-cu12==0.7.1
315
+ matplotlib-venn==1.1.2
316
+ scooby==0.11.0
317
+ fqdn==1.5.1
318
+ gin-config==0.5.0
319
+ ipython-sql==0.5.0
320
+ toml==0.10.2
321
+ PyOpenGL==3.1.10
322
+ weasel==0.4.3
323
+ jsonpointer==3.0.0
324
+ google-auth-httplib2==0.3.0
325
+ spint==1.0.7
326
+ nvtx==0.2.14
327
+ websocket-client==1.9.0
328
+ torchao==0.10.0
329
+ splot==1.1.7
330
+ langgraph-checkpoint==4.0.0
331
+ alabaster==1.0.0
332
+ jaxlib==0.7.2
333
+ google-resumable-media==2.8.0
334
+ namex==0.1.0
335
+ quantecon==0.11.0
336
+ nvidia-cuda-cccl-cu12==12.9.27
337
+ google-cloud-aiplatform==1.138.0
338
+ treelite==4.6.1
339
+ google-cloud-resource-manager==1.16.0
340
+ jupyter_core==5.9.1
341
+ spacy-legacy==3.0.12
342
+ librosa==0.11.0
343
+ ibis-framework==9.5.0
344
+ requests-toolbelt==1.0.0
345
+ smart_open==7.5.1
346
+ tensorflow-metadata==1.17.3
347
+ pysal==25.7
348
+ highspy==1.13.1
349
+ click==8.3.1
350
+ markdown-it-py==4.0.0
351
+ nvidia-cusolver-cu12==11.7.3.90
352
+ cupy-cuda12x==14.0.1
353
+ imutils==0.5.4
354
+ grpclib==0.4.9
355
+ opt_einsum==3.4.0
356
+ folium==0.20.0
357
+ moviepy==1.0.3
358
+ opencv-python==4.13.0.92
359
+ en_core_web_sm==3.8.0
360
+ tensorflow-text==2.19.0
361
+ langchain-core==1.2.15
362
+ yarl==1.22.0
363
+ spacy==3.8.11
364
+ importlib_resources==6.5.2
365
+ peft==0.18.1
366
+ lazy_loader==0.4
367
+ polars-runtime-32==1.35.2
368
+ pylibcudf-cu12==26.2.1
369
+ bigquery-magics==0.10.3
370
+ spanner-graph-notebook==1.1.8
371
+ sqlglot==25.20.2
372
+ linkify-it-py==2.0.3
373
+ types-pytz==2025.2.0.20251108
374
+ tifffile==2026.2.20
375
+ tsfresh==0.21.1
376
+ nbclassic==1.3.3
377
+ scikit-image==0.25.2
378
+ tensorflow_decision_forests==1.12.0
379
+ simsimd==6.5.13
380
+ isoduration==20.11.0
381
+ momepy==0.11.0
382
+ pytest==8.4.2
383
+ nvidia-cuda-nvcc-cu12==12.5.82
384
+ cuda-bindings==12.9.4
385
+ torchsummary==1.5.1
386
+ earthengine-api==1.5.24
387
+ webencodings==0.5.1
388
+ optree==0.19.0
389
+ jax-cuda12-pjrt==0.7.2
390
+ langchain==1.2.10
391
+ safehttpx==0.1.7
392
+ holidays==0.91
393
+ google-cloud-firestore==2.23.0
394
+ fastjsonschema==2.21.2
395
+ pymc==5.28.0
396
+ pydantic==2.12.3
397
+ jaraco.context==6.1.0
398
+ pyogrio==0.12.1
399
+ numba-cuda==0.22.2
400
+ fonttools==4.61.1
401
+ httpimport==1.4.1
402
+ rsa==4.9.1
403
+ tomlkit==0.13.3
404
+ entrypoints==0.4
405
+ anyio==4.12.1
406
+ charset-normalizer==3.4.4
407
+ pooch==1.9.0
408
+ libcuml-cu12==26.2.0
409
+ astropy-iers-data==0.2026.2.23.0.48.33
410
+ ipyleaflet==0.20.0
411
+ cryptography==43.0.3
412
+ missingno==0.5.2
413
+ langgraph==1.0.9
414
+ pandas-datareader==0.10.0
415
+ pyviz_comms==3.0.6
416
+ cycler==0.12.1
417
+ tensorboard==2.19.0
418
+ gast==0.7.0
419
+ jax-cuda12-plugin==0.7.2
420
+ platformdirs==4.9.2
421
+ google-genai==1.64.0
422
+ inflect==7.5.0
423
+ httplib2==0.31.2
424
+ h11==0.16.0
425
+ alembic==1.18.4
426
+ multitasking==0.0.12
427
+ rmm-cu12==26.2.0
428
+ cvxpy==1.6.7
429
+ affine==2.4.0
430
+ cuml-cu12==26.2.0
431
+ pyparsing==3.3.2
432
+ cffi==2.0.0
433
+ h5netcdf==1.8.1
434
+ Markdown==3.10.2
435
+ google-cloud-translate==3.24.0
436
+ rpy2==3.5.17
437
+ regex==2025.11.3
438
+ tf_keras==2.19.0
439
+ google-auth==2.47.0
440
+ nvidia-libnvcomp-cu12==5.1.0.21
441
+ Send2Trash==2.1.0
442
+ cymem==2.0.13
443
+ pylibraft-cu12==26.2.0
444
+ shap==0.50.0
445
+ shapely==2.1.2
446
+ psygnal==0.15.1
447
+ uri-template==1.3.0
448
+ parso==0.8.6
449
+ webcolors==25.10.0
450
+ nltk==3.9.1
451
+ atpublic==5.1
452
+ ImageIO==2.37.2
453
+ sphinxcontrib-applehelp==2.0.0
454
+ bigframes==2.35.0
455
+ pydot==4.0.1
456
+ onemkl-license==2025.3.1
457
+ treescope==0.1.10
458
+ tcmlib==1.4.1
459
+ opentelemetry-sdk==1.38.0
460
+ tiktoken==0.12.0
461
+ nibabel==5.3.3
462
+ multiprocess==0.70.16
463
+ typing_extensions==4.15.0
464
+ PyYAML==6.0.3
465
+ defusedxml==0.7.1
466
+ sphinxcontrib-serializinghtml==2.0.0
467
+ bleach==6.3.0
468
+ tenacity==9.1.4
469
+ python-utils==3.9.1
470
+ google-cloud-bigquery==3.40.1
471
+ google-cloud-bigquery-connection==1.20.0
472
+ opentelemetry-resourcedetector-gcp==1.11.0a0
473
+ ormsgpack==1.12.2
474
+ pydotplus==2.0.2
475
+ pycryptodomex==3.23.0
476
+ openai==2.23.0
477
+ matplotlib==3.10.0
478
+ ml_dtypes==0.5.4
479
+ uvloop==0.22.1
480
+ google-pasta==0.2.0
481
+ giddy==2.3.8
482
+ ipyparallel==8.8.0
483
+ keras==3.10.0
484
+ cuvs-cu12==26.2.0
485
+ mcp==1.26.0
486
+ spacy-loggers==1.0.5
487
+ google-cloud-logging==3.13.0
488
+ rfc3987-syntax==1.1.0
489
+ google-ai-generativelanguage==0.6.15
490
+ keras-hub==0.21.1
491
+ pydata-google-auth==1.9.1
492
+ absl-py==1.4.0
493
+ ydf==0.15.0
494
+ narwhals==2.17.0
495
+ nvidia-cusparse-cu12==12.5.8.93
496
+ openpyxl==3.1.5
497
+ nvidia-cublas-cu12==12.8.4.1
498
+ roman-numerals==4.1.0
499
+ vega-datasets==0.9.0
500
+ mpmath==1.3.0
501
+ etils==1.13.0
502
+ osqp==1.1.1
503
+ traittypes==0.2.3
504
+ opentelemetry-exporter-gcp-monitoring==1.11.0a0
505
+ graphviz==0.21
506
+ google-cloud-trace==1.18.0
507
+ einops==0.8.2
508
+ torchdata==0.11.0
509
+ jax==0.7.2
510
+ cachetools==6.2.6
511
+ aiohappyeyeballs==2.6.1
512
+ annotated-doc==0.0.4
513
+ starlette==0.52.1
514
+ fastapi==0.133.0
515
+ typer==0.24.1
516
+ duckdb==1.3.2
517
+ blinker==1.9.0
518
+ referencing==0.37.0
519
+ googledrivedownloader==1.1.0
520
+ GDAL==3.8.4
521
+ cuda-python==12.9.4
522
+ pycparser==3.0
523
+ et_xmlfile==2.0.0
524
+ jieba==0.42.1
525
+ zict==3.0.0
526
+ hyperopt==0.2.7
527
+ python-louvain==0.16
528
+ SQLAlchemy==2.0.47
529
+ cuda-toolkit==12.8.1
530
+ PyDrive2==1.21.3
531
+ roman-numerals-py==4.1.0
532
+ urllib3==2.5.0
533
+ jaraco.functools==4.4.0
534
+ optax==0.2.7
535
+ pyOpenSSL==24.2.1
536
+ jupyter-console==6.6.3
537
+ libkvikio-cu12==26.2.0
538
+ gspread==6.2.1
539
+ docstring_parser==0.17.0
540
+ albumentations==2.0.8
541
+ jupytext==1.19.1
542
+ seaborn==0.13.2
543
+ librmm-cu12==26.2.0
544
+ cons==0.4.7
545
+ matplotlib-inline==0.2.1
546
+ pynndescent==0.6.0
547
+ stringzilla==4.6.0
548
+ flatbuffers==25.12.19
549
+ omegaconf==2.3.0
550
+ umap-learn==0.5.11
551
+ progressbar2==4.5.0
552
+ pexpect==4.9.0
553
+ torchcodec==0.10.0+cu128
554
+ ptyprocess==0.7.0
555
+ pygame==2.6.1
556
+ kiwisolver==1.4.9
557
+ Cython==3.0.12
558
+ shellingham==1.5.4
559
+ soupsieve==2.8.3
560
+ snowballstemmer==3.0.1
561
+ propcache==0.4.1
562
+ ucxx-cu12==0.48.0
563
+ nbformat==5.10.4
564
+ python-snappy==0.7.3
565
+ rasterstats==0.20.0
566
+ bqplot==0.12.45
567
+ nest-asyncio==1.6.0
568
+ opencv-python-headless==4.13.0.92
569
+ notebook==6.5.7
570
+ flax==0.11.2
571
+ google-cloud-functions==1.22.0
572
+ multipledispatch==1.0.0
573
+ googleapis-common-protos==1.72.0
574
+ eerepr==0.1.2
575
+ torchaudio==2.10.0+cu128
576
+ locket==1.0.0
577
+ prettytable==3.17.0
578
+ pygit2==1.19.1
579
+ fastai==2.8.7
580
+ msgpack==1.1.2
581
+ clarabel==0.11.1
582
+ cligj==0.7.2
583
+ google-cloud-secret-manager==2.26.0
584
+ spglm==1.1.0
585
+ ipytree==0.2.2
586
+ termcolor==3.3.0
587
+ tweepy==4.16.0
588
+ google-cloud-core==2.5.0
589
+ dataproc-spark-connect==1.0.2
590
+ mkl==2025.3.1
591
+ umf==1.0.3
592
+ textblob==0.19.0
593
+ firebase-admin==6.9.0
594
+ simple-parsing==0.1.8
595
+ debugpy==1.8.15
596
+ google-cloud-discoveryengine==0.13.12
597
+ fastcore==1.12.16
598
+ decorator==4.4.2
599
+ pickleshare==0.7.5
600
+ rasterio==1.5.0
601
+ networkx==3.6.1
602
+ typer-slim==0.24.0
603
+ wasabi==1.1.3
604
+ mgwr==2.2.1
605
+ hdbscan==0.8.41
606
+ pydub==0.25.1
607
+ tobler==0.13.0
608
+ more-itertools==10.8.0
609
+ keyrings.google-artifactregistry-auth==1.1.2
610
+ cloudpickle==3.1.2
611
+ nvidia-nvtx-cu12==12.8.90
612
+ fastlite==0.2.4
613
+ colorcet==3.1.0
614
+ lark==1.3.1
615
+ antlr4-python3-runtime==4.9.3
616
+ keras-nlp==0.21.1
617
+ music21==9.9.1
618
+ Pygments==2.19.2
619
+ triton==3.6.0
620
+ toolz==0.12.1
621
+ python-slugify==8.0.4
622
+ sqlparse==0.5.5
623
+ jupyter-leaflet==0.20.0
624
+ gym-notices==0.1.0
625
+ torchvision==0.25.0+cu128
626
+ prophet==1.3.0
627
+ google-cloud-datastore==2.23.0
628
+ semantic-version==2.10.0
629
+ fastprogress==1.1.5
630
+ etuples==0.3.10
631
+ pyspark==4.0.2
632
+ orjson==3.11.7
633
+ terminado==0.18.1
634
+ accelerate==1.12.0
635
+ panel==1.8.7
636
+ apswutils==0.1.2
637
+ pyproj==3.7.2
638
+ sphinxcontrib-htmlhelp==2.1.0
639
+ certifi==2026.1.4
640
+ grpc-interceptor==0.15.4
641
+ pyasn1==0.6.2
642
+ geocoder==1.38.1
643
+ idna==3.11
644
+ mizani==0.13.5
645
+ jupyter_server_terminals==0.5.4
646
+ httpcore==1.0.9
647
+ pyasn1_modules==0.4.2
648
+ ffmpy==1.0.0
649
+ pyperclip==1.11.0
650
+ safetensors==0.7.0
651
+ ndindex==1.10.1
652
+ tblib==3.2.2
653
+ docutils==0.21.2
654
+ scs==3.2.11
655
+ distro==1.9.0
656
+ tf-slim==1.1.0
657
+ babel==2.18.0
658
+ google-cloud-pubsub==2.35.0
659
+ google-api-python-client==2.190.0
660
+ tzlocal==5.3.1
661
+ groovy==0.1.2
662
+ plum-dispatch==2.7.1
663
+ dask==2026.1.1
664
+ blosc2==4.0.0
665
+ sqlalchemy-spanner==1.17.2
666
+ orbax-checkpoint==0.11.33
667
+ wandb==0.25.0
668
+ geopandas==1.1.2
669
+ proglog==0.1.12
670
+ python-dateutil==2.9.0.post0
671
+ tzdata==2025.3
672
+ editdistance==0.8.1
673
+ langsmith==0.7.6
674
+ xarray-einstats==0.10.0
675
+ pydantic_core==2.41.4
676
+ tabulate==0.9.0
677
+ mmh3==5.2.0
678
+ sentry-sdk==2.53.0
679
+ spopt==0.7.0
680
+ dlib==19.24.6
681
+ community==1.0.0b1
682
+ tensorflow==2.19.0
683
+ ale-py==0.11.2
684
+ murmurhash==1.0.15
685
+ notebook_shim==0.2.4
686
+ mdurl==0.1.2
687
+ diffusers==0.36.0
688
+ requests==2.32.4
689
+ Flask==3.1.3
690
+ prometheus_client==0.24.1
691
+ uvicorn==0.41.0
692
+ logical-unification==0.4.7
693
+ soundfile==0.13.1
694
+ itsdangerous==2.2.0
695
+ jsonpatch==1.33
696
+ plotnine==0.14.5
697
+ distributed==2026.1.1
698
+ google-auth-oauthlib==1.2.4
699
+ gdown==5.2.1
700
+ brotli==1.2.0
701
+ py4j==0.10.9.9
702
+ pytensor==2.38.0
703
+ text-unidecode==1.3
704
+ yfinance==0.2.66
705
+ arviz==0.22.0
706
+ cudf-cu12==26.2.1
707
+ wordcloud==1.9.6
708
+ jaraco.classes==3.4.0
709
+ albucore==0.0.24
710
+ python-dotenv==1.2.1
711
+ uritemplate==4.2.0
712
+ nx-cugraph-cu12==26.2.0
713
+ raft-dask-cu12==26.2.0
714
+ hpack==4.1.0
715
+ numexpr==2.14.1
716
+ pydantic-settings==2.13.1
717
+ rapids-logger==0.2.3
718
+ cmake==3.31.10
719
+ pillow==11.3.0
720
+ jsonschema-specifications==2025.9.1
721
+ tables==3.10.2
722
+ google-cloud-storage==3.9.0
723
+ mapclassify==2.10.0
724
+ altair==5.5.0
725
+ filelock==3.24.3
726
+ google-cloud-appengine-logging==1.8.0
727
+ cufflinks==0.17.3
728
+ cvxopt==1.3.2
729
+ six==1.17.0
730
+ watchdog==6.0.0
731
+ sse-starlette==3.2.0
732
+ PySocks==1.7.1
733
+ jupyterlab_widgets==3.0.16
734
+ spaghetti==1.7.6
735
+ intel-cmplr-lib-ur==2025.3.2
736
+ uc-micro-py==1.0.3
737
+ Sphinx==8.2.3
738
+ PyJWT==2.11.0
739
+ google-cloud-bigtable==2.35.0
740
+ numba==0.60.0
741
+ httptools==0.7.1
742
+ rich==13.9.4
743
+ pointpats==2.5.5
744
+ watchfiles==1.1.1
745
+ promise==2.3
746
+ polars==1.35.2
747
+ greenlet==3.3.2
748
+ rfc3986-validator==0.1.1
749
+ threadpoolctl==3.6.0
750
+ opentelemetry-exporter-otlp-proto-http==1.38.0
751
+ libcuvs-cu12==26.2.0
752
+ sniffio==1.3.1
753
+ pylibcugraph-cu12==26.2.0
754
+ holoviews==1.22.1
755
+ pandas-gbq==0.30.0
756
+ frozenlist==1.8.0
757
+ google-crc32c==1.8.0
758
+ torch==2.10.0+cu128
759
+ ipyevents==2.0.4
760
+ libucxx-cu12==0.48.0
761
+ cramjam==2.11.0
762
+ opentelemetry-exporter-otlp-proto-common==1.38.0
763
+ wurlitzer==3.1.1
764
+ confection==0.1.5
765
+ stanio==0.5.1
766
+ easydict==1.13
767
+ argon2-cffi==25.1.0
768
+ llvmlite==0.43.0
769
+ humanize==4.15.0
770
+ rapids-dask-dependency==26.2.0
771
+ argon2-cffi-bindings==25.1.0
772
+ future==1.0.0
773
+ rpds-py==0.30.0
774
+ psycopg2==2.9.11
775
+ iniconfig==2.3.0
776
+ jupyter-events==0.12.0
777
+ nvidia-nccl-cu12==2.27.5
778
+ GitPython==3.1.46
779
+ joblib==1.5.3
780
+ beartype==0.22.9
781
+ hf-xet==1.3.0
782
+ Bottleneck==1.4.2
783
+ apsw==3.51.2.0
784
+ bokeh==3.8.2
785
+ google-cloud-dataproc==5.25.0
786
+ nvidia-cuda-nvrtc-cu12==12.8.93
787
+ colour==0.1.5
788
+ zipp==3.23.0
789
+ blis==1.3.3
790
+ click-plugins==1.1.1.2
791
+ httpx-sse==0.4.3
792
+ nvidia-nvshmem-cu12==3.4.5
793
+ sphinxcontrib-jsmath==1.0.1
794
+ prompt_toolkit==3.0.52
795
+ esda==2.8.1
796
+ param==2.3.2
797
+ google-cloud-speech==2.36.1
798
+ portpicker==1.5.2
799
+ PyWavelets==1.9.0
800
+ google-cloud-monitoring==2.29.1
801
+ Farama-Notifications==0.0.4
802
+ pytz==2025.2
803
+ MarkupSafe==3.0.3
804
+ pyomo==6.10.0
805
+ packaging==26.0
806
+ betterproto==2.0.0b6
807
+ libraft-cu12==26.2.0
808
+ typeguard==4.5.1
809
+ imbalanced-learn==0.14.1
810
+ google-adk==1.25.1
811
+ CacheControl==0.14.4
812
+ ipykernel==6.17.1
813
+ jsonpickle==4.1.1
814
+ xyzservices==2025.11.0
815
+ websockets==15.0.1
816
+ PyGObject==3.48.2
817
+ pandas-stubs==2.2.2.240909
818
+ proto-plus==1.27.1
819
+ segregation==2.5.3
820
+ ratelim==0.1.6
821
+ miniKanren==1.0.5
822
+ geographiclib==2.1
823
+ Jinja2==3.1.6
824
+ frozendict==2.4.7
825
+ libcudf-cu12==26.2.1
826
+ nvidia-cufft-cu12==11.3.3.83
827
+ typing-inspection==0.4.2
828
+ gradio_client==1.14.0
829
+ simplejson==3.20.2
830
+ ruff==0.15.2
831
+ imageio-ffmpeg==0.6.0
832
+ python-json-logger==4.0.0
833
+ cucim-cu12==26.2.0
834
+ jupyter_kernel_gateway==2.5.2
835
+ contourpy==1.3.3
836
+ google-api-core==2.30.0
837
+ opencv-contrib-python==4.13.0.92
838
+ nvidia-cudnn-cu12==9.10.2.21
839
+ opentelemetry-proto==1.38.0
840
+ dask-cudf-cu12==26.2.1
841
+ nvidia-nvimgcodec-cu12==0.7.0.11
842
+ statsmodels==0.14.6
843
+ opentelemetry-exporter-gcp-trace==1.11.0
844
+ deprecation==2.1.0
845
+ tinycss2==1.4.0
846
+ mdit-py-plugins==0.5.0
847
+ tensorflow-datasets==4.9.9
848
+ opentelemetry-api==1.38.0
849
+ langgraph-prebuilt==1.0.8
850
+ keyring==25.7.0
851
+ inequality==1.1.2
852
+ cyipopt==1.5.0
853
+ sympy==1.14.0
854
+ oauth2client==4.1.3
855
+ python-fasthtml==0.12.47
856
+ gspread-dataframe==4.0.0
857
+ wcwidth==0.6.0
858
+ geopy==2.4.1
859
+ natsort==8.4.0
860
+ timm==1.0.25
861
+ rfc3339-validator==0.1.4
862
+ stumpy==1.13.0
863
+ parsy==2.2
864
+ libucx-cu12==1.19.0
865
+ pyerfa==2.0.1.5
866
+ astropy==7.2.0
867
+ curl_cffi==0.14.0
868
+ xarray==2025.12.0
869
+ preshed==3.0.12
870
+ Werkzeug==3.1.6
871
+ SecretStorage==3.5.0
872
+ grpcio==1.78.1
873
+ slicer==0.0.8
874
+ cudf-polars-cu12==26.2.1
875
+ aiosqlite==0.22.1
876
+ grpcio-status==1.71.2
877
+ libpysal==4.14.1
878
+ gitdb==4.0.12
879
+ hyperframe==6.1.0
880
+ opentelemetry-semantic-conventions==0.59b0
881
+ wheel==0.46.3
882
+ h2==4.3.0
883
+ google-cloud-audit-log==0.4.0
884
+ tqdm==4.67.3
885
+ httpx==0.28.1
886
+ cloudpathlib==0.23.0
887
+ thinc==8.3.10
888
+ audioread==3.1.0
889
+ fastdownload==0.0.7
890
+ gcsfs==2025.3.0
891
+ nvidia-nvjitlink-cu12==12.8.93
892
+ access==1.1.10.post3
893
+ tornado==6.5.1
894
+ pandocfilters==1.5.1
895
+ fasttransform==0.0.2
896
+ nvidia-curand-cu12==10.3.9.90
897
+ python-multipart==0.0.22
898
+ yellowbrick==1.5
899
+ jupyter_client==7.4.9
900
+ google-generativeai==0.8.6
901
+ blobfile==3.2.0
902
+ importlib_metadata==8.7.1
903
+ tensorboard-data-server==0.7.2
904
+ attrs==25.4.0
905
+ tbb==2022.3.1
906
+ pluggy==1.6.0
907
+ cuda-pathfinder==1.3.5
908
+ rtree==1.4.1
909
+ arrow==1.4.0
910
+ wrapt==2.1.1
911
+ anywidget==0.9.21
912
+ mlxtend==0.23.4
913
+ smmap==5.0.2
914
+ aiohttp==3.13.3
915
+ opentelemetry-exporter-gcp-logging==1.11.0a0
916
+ sortedcontainers==2.4.0
917
+ pyshp==3.0.3
918
+ sklearn-compat==0.1.5
919
+ xxhash==3.6.0
920
+ zstandard==0.25.0
921
+ Mako==1.3.10
922
+ google-cloud-iam==2.21.0
923
+ autograd==1.8.0
924
+ glob2==0.7
925
+ tensorstore==0.1.81
926
+ tensorflow-probability==0.25.0
927
+ colorlover==0.3.0
928
+ ipyfilechooser==0.6.0
929
+ gradio==5.50.0
930
+ cmdstanpy==1.3.0
931
+ dm-tree==0.1.9
932
+ html5lib==1.1
933
+ python-apt==0.0.0
934
+ PyGObject==3.42.1
935
+ blinker==1.4
936
+ jeepney==0.7.1
937
+ six==1.16.0
938
+ oauthlib==3.2.0
939
+ wadllib==1.3.6
940
+ launchpadlib==1.10.16
941
+ dbus-python==1.2.18
942
+ PyJWT==2.3.0
943
+ importlib-metadata==4.6.4
944
+ httplib2==0.20.2
945
+ zipp==1.0.0
946
+ pyparsing==2.4.7
947
+ lazr.restfulclient==0.14.4
948
+ SecretStorage==3.3.1
949
+ distro==1.7.0
950
+ lazr.uri==1.0.6
951
+ more-itertools==8.10.0
952
+ python-apt==2.4.0+ubuntu4.1
953
+ cryptography==3.4.8
954
+ keyring==23.5.0
955
+ Markdown==3.3.6
956
+ Mako==1.1.3
957
+ MarkupSafe==2.0.1
958
+ packaging==24.1
959
+ inflect==7.3.1
960
+ autocommand==2.2.2
961
+ typeguard==4.3.0
962
+ jaraco.text==3.12.1
963
+ importlib_resources==6.4.0
964
+ wheel==0.43.0
965
+ zipp==3.19.2
966
+ platformdirs==4.2.2
967
+ importlib_metadata==8.0.0
968
+ tomli==2.0.1
969
+ jaraco.collections==5.1.0
970
+ more-itertools==10.3.0
971
+ typing_extensions==4.12.2
972
+ backports.tarfile==1.2.0
973
+ jaraco.functools==4.0.1
974
+ jaraco.context==5.3.0
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/wandb-metadata.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.6.113+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.12.12",
4
+ "startedAt": "2026-03-31T09:47:28.010618Z",
5
+ "program": "kaggle.ipynb",
6
+ "codePath": "kaggle.ipynb",
7
+ "email": "suvraadeep@gmail.com",
8
+ "root": "/kaggle/working",
9
+ "host": "7a129c53b2b5",
10
+ "executable": "/usr/bin/python3",
11
+ "cpu_count": 2,
12
+ "cpu_count_logical": 4,
13
+ "gpu": "Tesla T4",
14
+ "gpu_count": 2,
15
+ "disk": {
16
+ "/": {
17
+ "total": "8656922775552",
18
+ "used": "7347653709824"
19
+ }
20
+ },
21
+ "memory": {
22
+ "total": "33662472192"
23
+ },
24
+ "gpu_nvidia": [
25
+ {
26
+ "name": "Tesla T4",
27
+ "memoryTotal": "16106127360",
28
+ "cudaCores": 2560,
29
+ "architecture": "Turing",
30
+ "uuid": "GPU-c7ebff42-1f98-33a6-9169-a1e3925c4f52"
31
+ },
32
+ {
33
+ "name": "Tesla T4",
34
+ "memoryTotal": "16106127360",
35
+ "cudaCores": 2560,
36
+ "architecture": "Turing",
37
+ "uuid": "GPU-4f057d35-8a41-ce55-5d7d-60c77ebb7156"
38
+ }
39
+ ],
40
+ "cudaVersion": "13.0",
41
+ "writerId": "s21p4sab0yaskqym8r37eeuqwjpzso9d"
42
+ }
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":653},"final_oof_auc":0.7873122800363673,"lgbm_weight":0.49,"xgb_weight":0.51,"_timestamp":1.7749511069771621e+09,"_step":0,"_runtime":653}
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-03-31T09:47:28.022227997Z","level":"INFO","msg":"stream: starting","core version":"0.25.0"}
2
+ {"time":"2026-03-31T09:47:28.143809348Z","level":"INFO","msg":"stream: created new stream","id":"v7xaa9j7"}
3
+ {"time":"2026-03-31T09:47:28.143904318Z","level":"INFO","msg":"handler: started","stream_id":"v7xaa9j7"}
4
+ {"time":"2026-03-31T09:47:28.144018966Z","level":"INFO","msg":"stream: started","id":"v7xaa9j7"}
5
+ {"time":"2026-03-31T09:47:28.144050221Z","level":"INFO","msg":"writer: started","stream_id":"v7xaa9j7"}
6
+ {"time":"2026-03-31T09:47:28.144080736Z","level":"INFO","msg":"sender: started","stream_id":"v7xaa9j7"}
7
+ {"time":"2026-03-31T09:58:27.365155501Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
8
+ {"time":"2026-03-31T09:58:27.509723149Z","level":"INFO","msg":"handler: operation stats","stats":{}}
9
+ {"time":"2026-03-31T09:58:27.515863816Z","level":"INFO","msg":"stream: closing","id":"v7xaa9j7"}
10
+ {"time":"2026-03-31T09:58:27.515880856Z","level":"INFO","msg":"handler: closed","stream_id":"v7xaa9j7"}
11
+ {"time":"2026-03-31T09:58:27.516043135Z","level":"INFO","msg":"sender: closed","stream_id":"v7xaa9j7"}
12
+ {"time":"2026-03-31T09:58:27.51608442Z","level":"INFO","msg":"stream: closed","id":"v7xaa9j7"}
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/logs/debug.log ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2026-03-31 09:47:28,019 INFO MainThread:177 [wandb_init.py:setup_run_log_directory():717] Logging user logs to /kaggle/working/wandb/run-20260331_094728-v7xaa9j7/logs/debug.log
2
+ 2026-03-31 09:47:28,019 INFO MainThread:177 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to /kaggle/working/wandb/run-20260331_094728-v7xaa9j7/logs/debug-internal.log
3
+ 2026-03-31 09:47:28,019 INFO MainThread:177 [wandb_init.py:monkeypatch_ipython():636] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7e0be8427da0>
4
+ 2026-03-31 09:47:28,020 INFO MainThread:177 [wandb_init.py:init():844] calling init triggers
5
+ 2026-03-31 09:47:28,020 INFO MainThread:177 [wandb_init.py:init():849] wandb.init called with sweep_config: {}
6
+ config: {'lgbm': {'boosting_type': 'gbdt', 'num_leaves': 112, 'learning_rate': 0.02273805573563183, 'feature_fraction': 0.7123738038749523, 'bagging_fraction': 0.8170784332632994, 'bagging_freq': 1, 'min_child_samples': 82, 'reg_alpha': 0.0002359137306347715, 'reg_lambda': 8.598737339212267, 'max_depth': 9, 'min_gain_to_split': 0.0993578407670862, 'objective': 'binary', 'metric': 'auc', 'verbosity': -1, 'n_jobs': -1, 'seed': 42, 'device': 'gpu', 'max_bin': 255, 'gpu_use_dp': False, 'force_col_wise': True}, 'xgb': {'learning_rate': 0.028084447839949865, 'max_depth': 5, 'min_child_weight': 4, 'subsample': 0.6252365594243677, 'colsample_bytree': 0.8226259232371986, 'gamma': 0.26516420723672285, 'reg_alpha': 8.576808828106026, 'reg_lambda': 6.598703417106125, 'scale_pos_weight': 1.3690073364184308, 'objective': 'binary:logistic', 'eval_metric': 'auc', 'tree_method': 'hist', 'seed': 42, 'n_jobs': -1, 'device': 'cuda', 'max_bin': 256}, 'n_folds': 5, '_wandb': {}}
7
+ 2026-03-31 09:47:28,020 INFO MainThread:177 [wandb_init.py:init():892] starting backend
8
+ 2026-03-31 09:47:28,020 INFO MainThread:177 [wandb_init.py:init():895] sending inform_init request
9
+ 2026-03-31 09:47:28,021 INFO MainThread:177 [wandb_init.py:init():903] backend started and connected
10
+ 2026-03-31 09:47:28,029 INFO MainThread:177 [wandb_run.py:_label_probe_notebook():1333] probe notebook
11
+ 2026-03-31 09:47:33,411 INFO MainThread:177 [wandb_init.py:init():973] updated telemetry
12
+ 2026-03-31 09:47:33,412 INFO MainThread:177 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout
13
+ 2026-03-31 09:47:33,771 INFO MainThread:177 [wandb_init.py:init():1042] starting run threads in backend
14
+ 2026-03-31 09:47:34,481 INFO MainThread:177 [wandb_run.py:_console_start():2524] atexit reg
15
+ 2026-03-31 09:47:34,481 INFO MainThread:177 [wandb_run.py:_redirect():2373] redirect: wrap_raw
16
+ 2026-03-31 09:47:34,481 INFO MainThread:177 [wandb_run.py:_redirect():2442] Wrapping output streams.
17
+ 2026-03-31 09:47:34,482 INFO MainThread:177 [wandb_run.py:_redirect():2465] Redirects installed.
18
+ 2026-03-31 09:47:34,484 INFO MainThread:177 [wandb_init.py:init():1082] run started, returning control to user process
19
+ 2026-03-31 09:58:26,977 INFO MainThread:177 [wandb_run.py:_finish():2291] finishing run suvradeep-iit-guwahati-/credit-invisibility/v7xaa9j7
20
+ 2026-03-31 09:58:26,978 INFO MainThread:177 [jupyter.py:save_history():435] not saving jupyter history
21
+ 2026-03-31 09:58:26,978 INFO MainThread:177 [jupyter.py:save_ipynb():362] not saving jupyter notebook
22
+ 2026-03-31 09:58:26,978 INFO MainThread:177 [wandb_init.py:_jupyter_teardown():621] cleaning up jupyter logic
23
+ 2026-03-31 09:58:26,978 INFO MainThread:177 [wandb_run.py:_atexit_cleanup():2490] got exitcode: 0
24
+ 2026-03-31 09:58:26,978 INFO MainThread:177 [wandb_run.py:_restore():2472] restore
25
+ 2026-03-31 09:58:26,979 INFO MainThread:177 [wandb_run.py:_restore():2478] restore done
26
+ 2026-03-31 09:58:27,513 INFO MainThread:177 [wandb_run.py:_footer_sync_info():3868] logging synced files
kaggle_output/wandb/run-20260331_094728-v7xaa9j7/run-v7xaa9j7.wandb ADDED
Binary file (62.8 kB). View file
 
kaggle_output/wandb/run-20260331_095935-cu87492i/files/config.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.25.0
4
+ e:
5
+ 8a2fdkg7z7uti6wkzjkth5zruzlcpawp:
6
+ codePath: kaggle.ipynb
7
+ cpu_count: 2
8
+ cpu_count_logical: 4
9
+ cudaVersion: "13.0"
10
+ disk:
11
+ /:
12
+ total: "8656922775552"
13
+ used: "7347718606848"
14
+ email: suvraadeep@gmail.com
15
+ executable: /usr/bin/python3
16
+ gpu: Tesla T4
17
+ gpu_count: 2
18
+ gpu_nvidia:
19
+ - architecture: Turing
20
+ cudaCores: 2560
21
+ memoryTotal: "16106127360"
22
+ name: Tesla T4
23
+ uuid: GPU-c7ebff42-1f98-33a6-9169-a1e3925c4f52
24
+ - architecture: Turing
25
+ cudaCores: 2560
26
+ memoryTotal: "16106127360"
27
+ name: Tesla T4
28
+ uuid: GPU-4f057d35-8a41-ce55-5d7d-60c77ebb7156
29
+ host: 7a129c53b2b5
30
+ memory:
31
+ total: "33662472192"
32
+ os: Linux-6.6.113+-x86_64-with-glibc2.35
33
+ program: kaggle.ipynb
34
+ python: CPython 3.12.12
35
+ root: /kaggle/working
36
+ startedAt: "2026-03-31T09:59:35.665971Z"
37
+ writerId: 8a2fdkg7z7uti6wkzjkth5zruzlcpawp
38
+ m: []
39
+ python_version: 3.12.12
40
+ t:
41
+ "1":
42
+ - 1
43
+ - 2
44
+ - 3
45
+ - 5
46
+ - 6
47
+ - 8
48
+ - 11
49
+ - 12
50
+ - 35
51
+ - 49
52
+ - 53
53
+ - 54
54
+ - 71
55
+ - 75
56
+ - 105
57
+ "2":
58
+ - 1
59
+ - 2
60
+ - 3
61
+ - 5
62
+ - 6
63
+ - 8
64
+ - 11
65
+ - 12
66
+ - 35
67
+ - 49
68
+ - 53
69
+ - 54
70
+ - 71
71
+ - 75
72
+ - 105
73
+ "3":
74
+ - 2
75
+ - 13
76
+ - 15
77
+ "4": 3.12.12
78
+ "5": 0.25.0
79
+ "6": 4.41.2
80
+ "8":
81
+ - 1
82
+ - 2
83
+ - 12
84
+ "12": 0.25.0
85
+ "13": linux-x86_64
kaggle_output/wandb/run-20260331_095935-cu87492i/files/output.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [LightGBM] [Fatal] The number of features in data (30) is not the same as it was in training data (234).
2
+ You can set ``predict_disable_shape_check=true`` to discard this error, but please be aware what you are doing.
3
+ 📉 Simulating Concept Drift...
kaggle_output/wandb/run-20260331_095935-cu87492i/files/requirements.txt ADDED
@@ -0,0 +1,974 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==75.2.0
2
+ types-setuptools==80.10.0.20260124
3
+ requirements-parser==0.9.0
4
+ pip==24.1.2
5
+ logistro==2.0.1
6
+ tokenizers==0.19.1
7
+ huggingface_hub==0.36.2
8
+ scikit-learn==1.4.2
9
+ xgboost==2.0.3
10
+ sentence-transformers==2.7.0
11
+ choreographer==1.2.1
12
+ lightgbm==4.3.0
13
+ pytest-timeout==2.4.0
14
+ numpy==1.26.4
15
+ category-encoders==2.6.3
16
+ scipy==1.11.4
17
+ river==0.23.0
18
+ optuna-integration==4.8.0
19
+ kaleido==1.2.0
20
+ transformers==4.41.2
21
+ plotly==6.6.0
22
+ pytools==2025.2.5
23
+ pycuda==2026.1
24
+ siphash24==1.8
25
+ protobuf==5.29.5
26
+ torchtune==0.6.1
27
+ learntools==0.3.5
28
+ rouge_score==0.1.2
29
+ pyclipper==1.4.0
30
+ urwid_readline==0.15.1
31
+ h2o==3.46.0.10
32
+ rfc3161-client==1.0.5
33
+ blake3==1.0.8
34
+ mpld3==0.5.12
35
+ qgrid==1.3.1
36
+ ConfigSpace==1.2.2
37
+ woodwork==0.31.0
38
+ ujson==5.12.0
39
+ y-py==0.6.2
40
+ ipywidgets==8.1.5
41
+ scikit-multilearn==0.2.0
42
+ lightning-utilities==0.15.3
43
+ pytesseract==0.3.13
44
+ Cartopy==0.25.0
45
+ odfpy==1.4.1
46
+ Boruta==0.4.3
47
+ docstring-to-markdown==0.17
48
+ torchinfo==1.8.0
49
+ clint==0.5.1
50
+ comm==0.2.3
51
+ Deprecated==1.3.1
52
+ pymongo==4.16.0
53
+ tensorflow-io-gcs-filesystem==0.37.1
54
+ jmespath==1.1.0
55
+ pygltflib==1.16.5
56
+ keras-core==0.1.7
57
+ pandas==2.3.3
58
+ securesystemslib==1.3.1
59
+ ghapi==1.0.11
60
+ qtconsole==5.7.1
61
+ pyemd==2.0.0
62
+ pandas-profiling==3.6.6
63
+ nilearn==0.13.1
64
+ in-toto-attestation==0.9.3
65
+ a2a-sdk==0.3.25
66
+ keras-tuner==1.4.8
67
+ fastuuid==0.14.0
68
+ scikit-surprise==1.1.4
69
+ vtk==9.3.1
70
+ jupyter-ydoc==0.2.5
71
+ aiofiles==22.1.0
72
+ pytokens==0.4.1
73
+ featuretools==1.31.0
74
+ plotly-express==0.4.1
75
+ marshmallow==3.26.2
76
+ easyocr==1.7.2
77
+ ppft==1.7.8
78
+ openslide-bin==4.0.0.13
79
+ fuzzywuzzy==0.18.0
80
+ id==1.6.1
81
+ openslide-python==1.4.3
82
+ kaggle-environments==1.27.3
83
+ pyarrow==23.0.1
84
+ pandasql==0.7.3
85
+ update-checker==0.18.0
86
+ pathos==0.3.2
87
+ jupyter_server_fileid==0.9.3
88
+ fasttext==0.9.3
89
+ coverage==7.13.5
90
+ s3fs==2026.2.0
91
+ stopit==1.1.2
92
+ haversine==2.9.0
93
+ jupyter_server==2.12.5
94
+ geojson==3.2.0
95
+ botocore==1.42.70
96
+ fury==0.12.0
97
+ ipympl==0.10.0
98
+ ipython_pygments_lexers==1.1.1
99
+ olefile==0.47
100
+ jupyter_server_proxy==4.4.0
101
+ datasets==4.8.3
102
+ pytorch-ignite==0.5.3
103
+ xvfbwrapper==0.2.22
104
+ daal==2025.11.0
105
+ open_spiel==1.6.12
106
+ jupyter-lsp==1.5.1
107
+ trx-python==0.4.0
108
+ gpxpy==1.6.2
109
+ papermill==2.7.0
110
+ simpervisor==1.0.0
111
+ kagglehub==1.0.0
112
+ mlcrate==0.2.0
113
+ kaggle==2.0.0
114
+ dask-jobqueue==0.9.0
115
+ model-signing==1.1.1
116
+ jupyterlab==3.6.8
117
+ args==0.1.0
118
+ ImageHash==4.3.2
119
+ typing-inspect==0.9.0
120
+ PyUpSet==0.1.1.post7
121
+ dacite==1.9.2
122
+ pycryptodome==3.23.0
123
+ google-cloud-videointelligence==2.18.0
124
+ visions==0.8.1
125
+ deap==1.4.3
126
+ lml==0.2.0
127
+ jiter==0.10.0
128
+ ypy-websocket==0.8.4
129
+ cytoolz==1.1.0
130
+ path.py==12.5.0
131
+ tensorflow-io==0.37.1
132
+ wavio==0.0.9
133
+ pdf2image==1.17.0
134
+ line_profiler==5.0.2
135
+ fsspec==2026.2.0
136
+ aiobotocore==3.3.0
137
+ optuna==4.8.0
138
+ fastgit==0.0.4
139
+ litellm==1.82.4
140
+ pyLDAvis==3.4.1
141
+ Janome==0.5.0
142
+ langid==1.1.6
143
+ sigstore-models==0.0.6
144
+ pokerkit==0.6.3
145
+ pyaml==26.2.1
146
+ scikit-plot==0.3.7
147
+ nbdev==3.0.12
148
+ simpleitk==2.5.3
149
+ ml_collections==1.1.0
150
+ filetype==1.2.0
151
+ Wand==0.7.0
152
+ jupyter_server_ydoc==0.8.0
153
+ pyjson5==2.0.0
154
+ email-validator==2.3.0
155
+ execnb==0.1.18
156
+ colorama==0.4.6
157
+ ruamel.yaml==0.19.1
158
+ python-lsp-server==1.14.0
159
+ black==26.3.1
160
+ PyArabic==0.6.15
161
+ gymnasium==1.2.0
162
+ path==17.1.1
163
+ gensim==4.4.0
164
+ pypdf==6.9.1
165
+ TPOT==1.1.0
166
+ Pympler==1.1
167
+ bayesian-optimization==3.2.1
168
+ nbconvert==6.4.5
169
+ kornia==0.8.2
170
+ pathspec==1.0.4
171
+ pybind11==3.0.2
172
+ sigstore==4.2.0
173
+ funcy==2.0
174
+ func_timeout==4.3.5
175
+ testpath==0.6.0
176
+ aioitertools==0.13.0
177
+ google-cloud-vision==3.12.1
178
+ ray==2.54.0
179
+ kornia_rs==0.1.10
180
+ traitlets==5.14.3
181
+ gymnax==0.0.8
182
+ dnspython==2.8.0
183
+ chex==0.1.90
184
+ gym==0.26.2
185
+ nbclient==0.5.13
186
+ ydata-profiling==4.18.1
187
+ POT==0.9.6.post1
188
+ deepdiff==8.6.2
189
+ squarify==0.4.4
190
+ dataclasses-json==0.6.7
191
+ pettingzoo==1.24.0
192
+ pytorch-lightning==2.6.1
193
+ segment_anything==1.0
194
+ emoji==2.15.0
195
+ python-bidi==0.6.7
196
+ rgf-python==3.12.0
197
+ ninja==1.13.0
198
+ widgetsnbextension==4.0.15
199
+ minify_html==0.18.1
200
+ urwid==3.0.5
201
+ jedi==0.19.2
202
+ jupyterlab-lsp==3.10.2
203
+ python-lsp-jsonrpc==1.1.2
204
+ QtPy==2.4.3
205
+ pydicom==3.0.1
206
+ multimethod==1.12
207
+ torchmetrics==1.9.0
208
+ asttokens==3.0.1
209
+ docker==7.1.0
210
+ dask-expr==2.0.0
211
+ s3transfer==0.16.0
212
+ build==1.4.0
213
+ Shimmy==2.0.0
214
+ igraph==1.0.0
215
+ puremagic==2.1.0
216
+ jupyterlab_server==2.28.0
217
+ isoweek==1.3.3
218
+ texttable==1.7.0
219
+ kt-legacy==1.0.5
220
+ orderly-set==5.5.0
221
+ pyexcel-io==0.6.7
222
+ catboost==1.2.10
223
+ kagglesdk==0.1.16
224
+ mamba==0.11.3
225
+ dipy==1.12.0
226
+ colorlog==6.10.1
227
+ asn1crypto==1.5.1
228
+ pyexcel-ods==0.6.0
229
+ lime==0.2.0.1
230
+ pox==0.3.7
231
+ rfc8785==0.1.4
232
+ sigstore-rekor-types==0.0.18
233
+ cesium==0.12.4
234
+ boto3==1.42.70
235
+ tuf==6.0.0
236
+ hep_ml==0.8.0
237
+ pyproject_hooks==1.2.0
238
+ phik==0.12.5
239
+ pudb==2025.1.5
240
+ mne==1.11.0
241
+ keras-cv==0.9.0
242
+ dill==0.4.1
243
+ gatspy==0.3
244
+ scikit-learn-intelex==2025.11.0
245
+ onnx==1.20.1
246
+ scikit-optimize==0.10.2
247
+ mypy_extensions==1.1.0
248
+ mistune==0.8.4
249
+ json5==0.13.0
250
+ google-colab==1.0.0
251
+ psutil==5.9.5
252
+ jsonschema==4.26.0
253
+ astunparse==1.6.3
254
+ pycocotools==2.0.11
255
+ lxml==6.0.2
256
+ ipython==7.34.0
257
+ oauthlib==3.3.1
258
+ grpc-google-iam-v1==0.14.3
259
+ array_record==0.8.3
260
+ PuLP==3.3.0
261
+ nvidia-cuda-runtime-cu12==12.8.90
262
+ dask-cuda==26.2.0
263
+ immutabledict==4.3.1
264
+ peewee==4.0.0
265
+ fiona==1.10.1
266
+ aiosignal==1.4.0
267
+ libclang==18.1.1
268
+ annotated-types==0.7.0
269
+ spreg==1.8.5
270
+ grain==0.2.15
271
+ geemap==0.35.3
272
+ patsy==1.0.2
273
+ imagesize==1.4.1
274
+ py-cpuinfo==9.0.0
275
+ pyzmq==26.2.1
276
+ nvidia-cufile-cu12==1.13.1.3
277
+ multidict==6.7.1
278
+ srsly==2.5.2
279
+ intel-openmp==2025.3.2
280
+ uuid_utils==0.14.1
281
+ google-cloud-language==2.19.0
282
+ soxr==1.0.0
283
+ jupyterlab_pygments==0.3.0
284
+ backcall==0.2.0
285
+ tensorflow-hub==0.16.1
286
+ google==3.0.0
287
+ requests-oauthlib==2.0.0
288
+ dopamine_rl==4.1.2
289
+ overrides==7.7.0
290
+ db-dtypes==1.5.0
291
+ jeepney==0.9.0
292
+ langgraph-sdk==0.3.9
293
+ ipython-genutils==0.2.0
294
+ nvidia-cuda-cupti-cu12==12.8.90
295
+ libcugraph-cu12==26.2.0
296
+ catalogue==2.0.10
297
+ beautifulsoup4==4.13.5
298
+ nvidia-ml-py==13.590.48
299
+ sphinxcontrib-devhelp==2.0.0
300
+ partd==1.4.2
301
+ sklearn-pandas==2.2.0
302
+ sphinxcontrib-qthelp==2.0.0
303
+ google-cloud-spanner==3.63.0
304
+ h5py==3.15.1
305
+ python-box==7.4.1
306
+ distributed-ucxx-cu12==0.48.0
307
+ xlrd==2.0.2
308
+ branca==0.8.2
309
+ chardet==5.2.0
310
+ pycairo==1.29.0
311
+ Authlib==1.6.8
312
+ cuda-core==0.3.2
313
+ sentencepiece==0.2.1
314
+ nvidia-cusparselt-cu12==0.7.1
315
+ matplotlib-venn==1.1.2
316
+ scooby==0.11.0
317
+ fqdn==1.5.1
318
+ gin-config==0.5.0
319
+ ipython-sql==0.5.0
320
+ toml==0.10.2
321
+ PyOpenGL==3.1.10
322
+ weasel==0.4.3
323
+ jsonpointer==3.0.0
324
+ google-auth-httplib2==0.3.0
325
+ spint==1.0.7
326
+ nvtx==0.2.14
327
+ websocket-client==1.9.0
328
+ torchao==0.10.0
329
+ splot==1.1.7
330
+ langgraph-checkpoint==4.0.0
331
+ alabaster==1.0.0
332
+ jaxlib==0.7.2
333
+ google-resumable-media==2.8.0
334
+ namex==0.1.0
335
+ quantecon==0.11.0
336
+ nvidia-cuda-cccl-cu12==12.9.27
337
+ google-cloud-aiplatform==1.138.0
338
+ treelite==4.6.1
339
+ google-cloud-resource-manager==1.16.0
340
+ jupyter_core==5.9.1
341
+ spacy-legacy==3.0.12
342
+ librosa==0.11.0
343
+ ibis-framework==9.5.0
344
+ requests-toolbelt==1.0.0
345
+ smart_open==7.5.1
346
+ tensorflow-metadata==1.17.3
347
+ pysal==25.7
348
+ highspy==1.13.1
349
+ click==8.3.1
350
+ markdown-it-py==4.0.0
351
+ nvidia-cusolver-cu12==11.7.3.90
352
+ cupy-cuda12x==14.0.1
353
+ imutils==0.5.4
354
+ grpclib==0.4.9
355
+ opt_einsum==3.4.0
356
+ folium==0.20.0
357
+ moviepy==1.0.3
358
+ opencv-python==4.13.0.92
359
+ en_core_web_sm==3.8.0
360
+ tensorflow-text==2.19.0
361
+ langchain-core==1.2.15
362
+ yarl==1.22.0
363
+ spacy==3.8.11
364
+ importlib_resources==6.5.2
365
+ peft==0.18.1
366
+ lazy_loader==0.4
367
+ polars-runtime-32==1.35.2
368
+ pylibcudf-cu12==26.2.1
369
+ bigquery-magics==0.10.3
370
+ spanner-graph-notebook==1.1.8
371
+ sqlglot==25.20.2
372
+ linkify-it-py==2.0.3
373
+ types-pytz==2025.2.0.20251108
374
+ tifffile==2026.2.20
375
+ tsfresh==0.21.1
376
+ nbclassic==1.3.3
377
+ scikit-image==0.25.2
378
+ tensorflow_decision_forests==1.12.0
379
+ simsimd==6.5.13
380
+ isoduration==20.11.0
381
+ momepy==0.11.0
382
+ pytest==8.4.2
383
+ nvidia-cuda-nvcc-cu12==12.5.82
384
+ cuda-bindings==12.9.4
385
+ torchsummary==1.5.1
386
+ earthengine-api==1.5.24
387
+ webencodings==0.5.1
388
+ optree==0.19.0
389
+ jax-cuda12-pjrt==0.7.2
390
+ langchain==1.2.10
391
+ safehttpx==0.1.7
392
+ holidays==0.91
393
+ google-cloud-firestore==2.23.0
394
+ fastjsonschema==2.21.2
395
+ pymc==5.28.0
396
+ pydantic==2.12.3
397
+ jaraco.context==6.1.0
398
+ pyogrio==0.12.1
399
+ numba-cuda==0.22.2
400
+ fonttools==4.61.1
401
+ httpimport==1.4.1
402
+ rsa==4.9.1
403
+ tomlkit==0.13.3
404
+ entrypoints==0.4
405
+ anyio==4.12.1
406
+ charset-normalizer==3.4.4
407
+ pooch==1.9.0
408
+ libcuml-cu12==26.2.0
409
+ astropy-iers-data==0.2026.2.23.0.48.33
410
+ ipyleaflet==0.20.0
411
+ cryptography==43.0.3
412
+ missingno==0.5.2
413
+ langgraph==1.0.9
414
+ pandas-datareader==0.10.0
415
+ pyviz_comms==3.0.6
416
+ cycler==0.12.1
417
+ tensorboard==2.19.0
418
+ gast==0.7.0
419
+ jax-cuda12-plugin==0.7.2
420
+ platformdirs==4.9.2
421
+ google-genai==1.64.0
422
+ inflect==7.5.0
423
+ httplib2==0.31.2
424
+ h11==0.16.0
425
+ alembic==1.18.4
426
+ multitasking==0.0.12
427
+ rmm-cu12==26.2.0
428
+ cvxpy==1.6.7
429
+ affine==2.4.0
430
+ cuml-cu12==26.2.0
431
+ pyparsing==3.3.2
432
+ cffi==2.0.0
433
+ h5netcdf==1.8.1
434
+ Markdown==3.10.2
435
+ google-cloud-translate==3.24.0
436
+ rpy2==3.5.17
437
+ regex==2025.11.3
438
+ tf_keras==2.19.0
439
+ google-auth==2.47.0
440
+ nvidia-libnvcomp-cu12==5.1.0.21
441
+ Send2Trash==2.1.0
442
+ cymem==2.0.13
443
+ pylibraft-cu12==26.2.0
444
+ shap==0.50.0
445
+ shapely==2.1.2
446
+ psygnal==0.15.1
447
+ uri-template==1.3.0
448
+ parso==0.8.6
449
+ webcolors==25.10.0
450
+ nltk==3.9.1
451
+ atpublic==5.1
452
+ ImageIO==2.37.2
453
+ sphinxcontrib-applehelp==2.0.0
454
+ bigframes==2.35.0
455
+ pydot==4.0.1
456
+ onemkl-license==2025.3.1
457
+ treescope==0.1.10
458
+ tcmlib==1.4.1
459
+ opentelemetry-sdk==1.38.0
460
+ tiktoken==0.12.0
461
+ nibabel==5.3.3
462
+ multiprocess==0.70.16
463
+ typing_extensions==4.15.0
464
+ PyYAML==6.0.3
465
+ defusedxml==0.7.1
466
+ sphinxcontrib-serializinghtml==2.0.0
467
+ bleach==6.3.0
468
+ tenacity==9.1.4
469
+ python-utils==3.9.1
470
+ google-cloud-bigquery==3.40.1
471
+ google-cloud-bigquery-connection==1.20.0
472
+ opentelemetry-resourcedetector-gcp==1.11.0a0
473
+ ormsgpack==1.12.2
474
+ pydotplus==2.0.2
475
+ pycryptodomex==3.23.0
476
+ openai==2.23.0
477
+ matplotlib==3.10.0
478
+ ml_dtypes==0.5.4
479
+ uvloop==0.22.1
480
+ google-pasta==0.2.0
481
+ giddy==2.3.8
482
+ ipyparallel==8.8.0
483
+ keras==3.10.0
484
+ cuvs-cu12==26.2.0
485
+ mcp==1.26.0
486
+ spacy-loggers==1.0.5
487
+ google-cloud-logging==3.13.0
488
+ rfc3987-syntax==1.1.0
489
+ google-ai-generativelanguage==0.6.15
490
+ keras-hub==0.21.1
491
+ pydata-google-auth==1.9.1
492
+ absl-py==1.4.0
493
+ ydf==0.15.0
494
+ narwhals==2.17.0
495
+ nvidia-cusparse-cu12==12.5.8.93
496
+ openpyxl==3.1.5
497
+ nvidia-cublas-cu12==12.8.4.1
498
+ roman-numerals==4.1.0
499
+ vega-datasets==0.9.0
500
+ mpmath==1.3.0
501
+ etils==1.13.0
502
+ osqp==1.1.1
503
+ traittypes==0.2.3
504
+ opentelemetry-exporter-gcp-monitoring==1.11.0a0
505
+ graphviz==0.21
506
+ google-cloud-trace==1.18.0
507
+ einops==0.8.2
508
+ torchdata==0.11.0
509
+ jax==0.7.2
510
+ cachetools==6.2.6
511
+ aiohappyeyeballs==2.6.1
512
+ annotated-doc==0.0.4
513
+ starlette==0.52.1
514
+ fastapi==0.133.0
515
+ typer==0.24.1
516
+ duckdb==1.3.2
517
+ blinker==1.9.0
518
+ referencing==0.37.0
519
+ googledrivedownloader==1.1.0
520
+ GDAL==3.8.4
521
+ cuda-python==12.9.4
522
+ pycparser==3.0
523
+ et_xmlfile==2.0.0
524
+ jieba==0.42.1
525
+ zict==3.0.0
526
+ hyperopt==0.2.7
527
+ python-louvain==0.16
528
+ SQLAlchemy==2.0.47
529
+ cuda-toolkit==12.8.1
530
+ PyDrive2==1.21.3
531
+ roman-numerals-py==4.1.0
532
+ urllib3==2.5.0
533
+ jaraco.functools==4.4.0
534
+ optax==0.2.7
535
+ pyOpenSSL==24.2.1
536
+ jupyter-console==6.6.3
537
+ libkvikio-cu12==26.2.0
538
+ gspread==6.2.1
539
+ docstring_parser==0.17.0
540
+ albumentations==2.0.8
541
+ jupytext==1.19.1
542
+ seaborn==0.13.2
543
+ librmm-cu12==26.2.0
544
+ cons==0.4.7
545
+ matplotlib-inline==0.2.1
546
+ pynndescent==0.6.0
547
+ stringzilla==4.6.0
548
+ flatbuffers==25.12.19
549
+ omegaconf==2.3.0
550
+ umap-learn==0.5.11
551
+ progressbar2==4.5.0
552
+ pexpect==4.9.0
553
+ torchcodec==0.10.0+cu128
554
+ ptyprocess==0.7.0
555
+ pygame==2.6.1
556
+ kiwisolver==1.4.9
557
+ Cython==3.0.12
558
+ shellingham==1.5.4
559
+ soupsieve==2.8.3
560
+ snowballstemmer==3.0.1
561
+ propcache==0.4.1
562
+ ucxx-cu12==0.48.0
563
+ nbformat==5.10.4
564
+ python-snappy==0.7.3
565
+ rasterstats==0.20.0
566
+ bqplot==0.12.45
567
+ nest-asyncio==1.6.0
568
+ opencv-python-headless==4.13.0.92
569
+ notebook==6.5.7
570
+ flax==0.11.2
571
+ google-cloud-functions==1.22.0
572
+ multipledispatch==1.0.0
573
+ googleapis-common-protos==1.72.0
574
+ eerepr==0.1.2
575
+ torchaudio==2.10.0+cu128
576
+ locket==1.0.0
577
+ prettytable==3.17.0
578
+ pygit2==1.19.1
579
+ fastai==2.8.7
580
+ msgpack==1.1.2
581
+ clarabel==0.11.1
582
+ cligj==0.7.2
583
+ google-cloud-secret-manager==2.26.0
584
+ spglm==1.1.0
585
+ ipytree==0.2.2
586
+ termcolor==3.3.0
587
+ tweepy==4.16.0
588
+ google-cloud-core==2.5.0
589
+ dataproc-spark-connect==1.0.2
590
+ mkl==2025.3.1
591
+ umf==1.0.3
592
+ textblob==0.19.0
593
+ firebase-admin==6.9.0
594
+ simple-parsing==0.1.8
595
+ debugpy==1.8.15
596
+ google-cloud-discoveryengine==0.13.12
597
+ fastcore==1.12.16
598
+ decorator==4.4.2
599
+ pickleshare==0.7.5
600
+ rasterio==1.5.0
601
+ networkx==3.6.1
602
+ typer-slim==0.24.0
603
+ wasabi==1.1.3
604
+ mgwr==2.2.1
605
+ hdbscan==0.8.41
606
+ pydub==0.25.1
607
+ tobler==0.13.0
608
+ more-itertools==10.8.0
609
+ keyrings.google-artifactregistry-auth==1.1.2
610
+ cloudpickle==3.1.2
611
+ nvidia-nvtx-cu12==12.8.90
612
+ fastlite==0.2.4
613
+ colorcet==3.1.0
614
+ lark==1.3.1
615
+ antlr4-python3-runtime==4.9.3
616
+ keras-nlp==0.21.1
617
+ music21==9.9.1
618
+ Pygments==2.19.2
619
+ triton==3.6.0
620
+ toolz==0.12.1
621
+ python-slugify==8.0.4
622
+ sqlparse==0.5.5
623
+ jupyter-leaflet==0.20.0
624
+ gym-notices==0.1.0
625
+ torchvision==0.25.0+cu128
626
+ prophet==1.3.0
627
+ google-cloud-datastore==2.23.0
628
+ semantic-version==2.10.0
629
+ fastprogress==1.1.5
630
+ etuples==0.3.10
631
+ pyspark==4.0.2
632
+ orjson==3.11.7
633
+ terminado==0.18.1
634
+ accelerate==1.12.0
635
+ panel==1.8.7
636
+ apswutils==0.1.2
637
+ pyproj==3.7.2
638
+ sphinxcontrib-htmlhelp==2.1.0
639
+ certifi==2026.1.4
640
+ grpc-interceptor==0.15.4
641
+ pyasn1==0.6.2
642
+ geocoder==1.38.1
643
+ idna==3.11
644
+ mizani==0.13.5
645
+ jupyter_server_terminals==0.5.4
646
+ httpcore==1.0.9
647
+ pyasn1_modules==0.4.2
648
+ ffmpy==1.0.0
649
+ pyperclip==1.11.0
650
+ safetensors==0.7.0
651
+ ndindex==1.10.1
652
+ tblib==3.2.2
653
+ docutils==0.21.2
654
+ scs==3.2.11
655
+ distro==1.9.0
656
+ tf-slim==1.1.0
657
+ babel==2.18.0
658
+ google-cloud-pubsub==2.35.0
659
+ google-api-python-client==2.190.0
660
+ tzlocal==5.3.1
661
+ groovy==0.1.2
662
+ plum-dispatch==2.7.1
663
+ dask==2026.1.1
664
+ blosc2==4.0.0
665
+ sqlalchemy-spanner==1.17.2
666
+ orbax-checkpoint==0.11.33
667
+ wandb==0.25.0
668
+ geopandas==1.1.2
669
+ proglog==0.1.12
670
+ python-dateutil==2.9.0.post0
671
+ tzdata==2025.3
672
+ editdistance==0.8.1
673
+ langsmith==0.7.6
674
+ xarray-einstats==0.10.0
675
+ pydantic_core==2.41.4
676
+ tabulate==0.9.0
677
+ mmh3==5.2.0
678
+ sentry-sdk==2.53.0
679
+ spopt==0.7.0
680
+ dlib==19.24.6
681
+ community==1.0.0b1
682
+ tensorflow==2.19.0
683
+ ale-py==0.11.2
684
+ murmurhash==1.0.15
685
+ notebook_shim==0.2.4
686
+ mdurl==0.1.2
687
+ diffusers==0.36.0
688
+ requests==2.32.4
689
+ Flask==3.1.3
690
+ prometheus_client==0.24.1
691
+ uvicorn==0.41.0
692
+ logical-unification==0.4.7
693
+ soundfile==0.13.1
694
+ itsdangerous==2.2.0
695
+ jsonpatch==1.33
696
+ plotnine==0.14.5
697
+ distributed==2026.1.1
698
+ google-auth-oauthlib==1.2.4
699
+ gdown==5.2.1
700
+ brotli==1.2.0
701
+ py4j==0.10.9.9
702
+ pytensor==2.38.0
703
+ text-unidecode==1.3
704
+ yfinance==0.2.66
705
+ arviz==0.22.0
706
+ cudf-cu12==26.2.1
707
+ wordcloud==1.9.6
708
+ jaraco.classes==3.4.0
709
+ albucore==0.0.24
710
+ python-dotenv==1.2.1
711
+ uritemplate==4.2.0
712
+ nx-cugraph-cu12==26.2.0
713
+ raft-dask-cu12==26.2.0
714
+ hpack==4.1.0
715
+ numexpr==2.14.1
716
+ pydantic-settings==2.13.1
717
+ rapids-logger==0.2.3
718
+ cmake==3.31.10
719
+ pillow==11.3.0
720
+ jsonschema-specifications==2025.9.1
721
+ tables==3.10.2
722
+ google-cloud-storage==3.9.0
723
+ mapclassify==2.10.0
724
+ altair==5.5.0
725
+ filelock==3.24.3
726
+ google-cloud-appengine-logging==1.8.0
727
+ cufflinks==0.17.3
728
+ cvxopt==1.3.2
729
+ six==1.17.0
730
+ watchdog==6.0.0
731
+ sse-starlette==3.2.0
732
+ PySocks==1.7.1
733
+ jupyterlab_widgets==3.0.16
734
+ spaghetti==1.7.6
735
+ intel-cmplr-lib-ur==2025.3.2
736
+ uc-micro-py==1.0.3
737
+ Sphinx==8.2.3
738
+ PyJWT==2.11.0
739
+ google-cloud-bigtable==2.35.0
740
+ numba==0.60.0
741
+ httptools==0.7.1
742
+ rich==13.9.4
743
+ pointpats==2.5.5
744
+ watchfiles==1.1.1
745
+ promise==2.3
746
+ polars==1.35.2
747
+ greenlet==3.3.2
748
+ rfc3986-validator==0.1.1
749
+ threadpoolctl==3.6.0
750
+ opentelemetry-exporter-otlp-proto-http==1.38.0
751
+ libcuvs-cu12==26.2.0
752
+ sniffio==1.3.1
753
+ pylibcugraph-cu12==26.2.0
754
+ holoviews==1.22.1
755
+ pandas-gbq==0.30.0
756
+ frozenlist==1.8.0
757
+ google-crc32c==1.8.0
758
+ torch==2.10.0+cu128
759
+ ipyevents==2.0.4
760
+ libucxx-cu12==0.48.0
761
+ cramjam==2.11.0
762
+ opentelemetry-exporter-otlp-proto-common==1.38.0
763
+ wurlitzer==3.1.1
764
+ confection==0.1.5
765
+ stanio==0.5.1
766
+ easydict==1.13
767
+ argon2-cffi==25.1.0
768
+ llvmlite==0.43.0
769
+ humanize==4.15.0
770
+ rapids-dask-dependency==26.2.0
771
+ argon2-cffi-bindings==25.1.0
772
+ future==1.0.0
773
+ rpds-py==0.30.0
774
+ psycopg2==2.9.11
775
+ iniconfig==2.3.0
776
+ jupyter-events==0.12.0
777
+ nvidia-nccl-cu12==2.27.5
778
+ GitPython==3.1.46
779
+ joblib==1.5.3
780
+ beartype==0.22.9
781
+ hf-xet==1.3.0
782
+ Bottleneck==1.4.2
783
+ apsw==3.51.2.0
784
+ bokeh==3.8.2
785
+ google-cloud-dataproc==5.25.0
786
+ nvidia-cuda-nvrtc-cu12==12.8.93
787
+ colour==0.1.5
788
+ zipp==3.23.0
789
+ blis==1.3.3
790
+ click-plugins==1.1.1.2
791
+ httpx-sse==0.4.3
792
+ nvidia-nvshmem-cu12==3.4.5
793
+ sphinxcontrib-jsmath==1.0.1
794
+ prompt_toolkit==3.0.52
795
+ esda==2.8.1
796
+ param==2.3.2
797
+ google-cloud-speech==2.36.1
798
+ portpicker==1.5.2
799
+ PyWavelets==1.9.0
800
+ google-cloud-monitoring==2.29.1
801
+ Farama-Notifications==0.0.4
802
+ pytz==2025.2
803
+ MarkupSafe==3.0.3
804
+ pyomo==6.10.0
805
+ packaging==26.0
806
+ betterproto==2.0.0b6
807
+ libraft-cu12==26.2.0
808
+ typeguard==4.5.1
809
+ imbalanced-learn==0.14.1
810
+ google-adk==1.25.1
811
+ CacheControl==0.14.4
812
+ ipykernel==6.17.1
813
+ jsonpickle==4.1.1
814
+ xyzservices==2025.11.0
815
+ websockets==15.0.1
816
+ PyGObject==3.48.2
817
+ pandas-stubs==2.2.2.240909
818
+ proto-plus==1.27.1
819
+ segregation==2.5.3
820
+ ratelim==0.1.6
821
+ miniKanren==1.0.5
822
+ geographiclib==2.1
823
+ Jinja2==3.1.6
824
+ frozendict==2.4.7
825
+ libcudf-cu12==26.2.1
826
+ nvidia-cufft-cu12==11.3.3.83
827
+ typing-inspection==0.4.2
828
+ gradio_client==1.14.0
829
+ simplejson==3.20.2
830
+ ruff==0.15.2
831
+ imageio-ffmpeg==0.6.0
832
+ python-json-logger==4.0.0
833
+ cucim-cu12==26.2.0
834
+ jupyter_kernel_gateway==2.5.2
835
+ contourpy==1.3.3
836
+ google-api-core==2.30.0
837
+ opencv-contrib-python==4.13.0.92
838
+ nvidia-cudnn-cu12==9.10.2.21
839
+ opentelemetry-proto==1.38.0
840
+ dask-cudf-cu12==26.2.1
841
+ nvidia-nvimgcodec-cu12==0.7.0.11
842
+ statsmodels==0.14.6
843
+ opentelemetry-exporter-gcp-trace==1.11.0
844
+ deprecation==2.1.0
845
+ tinycss2==1.4.0
846
+ mdit-py-plugins==0.5.0
847
+ tensorflow-datasets==4.9.9
848
+ opentelemetry-api==1.38.0
849
+ langgraph-prebuilt==1.0.8
850
+ keyring==25.7.0
851
+ inequality==1.1.2
852
+ cyipopt==1.5.0
853
+ sympy==1.14.0
854
+ oauth2client==4.1.3
855
+ python-fasthtml==0.12.47
856
+ gspread-dataframe==4.0.0
857
+ wcwidth==0.6.0
858
+ geopy==2.4.1
859
+ natsort==8.4.0
860
+ timm==1.0.25
861
+ rfc3339-validator==0.1.4
862
+ stumpy==1.13.0
863
+ parsy==2.2
864
+ libucx-cu12==1.19.0
865
+ pyerfa==2.0.1.5
866
+ astropy==7.2.0
867
+ curl_cffi==0.14.0
868
+ xarray==2025.12.0
869
+ preshed==3.0.12
870
+ Werkzeug==3.1.6
871
+ SecretStorage==3.5.0
872
+ grpcio==1.78.1
873
+ slicer==0.0.8
874
+ cudf-polars-cu12==26.2.1
875
+ aiosqlite==0.22.1
876
+ grpcio-status==1.71.2
877
+ libpysal==4.14.1
878
+ gitdb==4.0.12
879
+ hyperframe==6.1.0
880
+ opentelemetry-semantic-conventions==0.59b0
881
+ wheel==0.46.3
882
+ h2==4.3.0
883
+ google-cloud-audit-log==0.4.0
884
+ tqdm==4.67.3
885
+ httpx==0.28.1
886
+ cloudpathlib==0.23.0
887
+ thinc==8.3.10
888
+ audioread==3.1.0
889
+ fastdownload==0.0.7
890
+ gcsfs==2025.3.0
891
+ nvidia-nvjitlink-cu12==12.8.93
892
+ access==1.1.10.post3
893
+ tornado==6.5.1
894
+ pandocfilters==1.5.1
895
+ fasttransform==0.0.2
896
+ nvidia-curand-cu12==10.3.9.90
897
+ python-multipart==0.0.22
898
+ yellowbrick==1.5
899
+ jupyter_client==7.4.9
900
+ google-generativeai==0.8.6
901
+ blobfile==3.2.0
902
+ importlib_metadata==8.7.1
903
+ tensorboard-data-server==0.7.2
904
+ attrs==25.4.0
905
+ tbb==2022.3.1
906
+ pluggy==1.6.0
907
+ cuda-pathfinder==1.3.5
908
+ rtree==1.4.1
909
+ arrow==1.4.0
910
+ wrapt==2.1.1
911
+ anywidget==0.9.21
912
+ mlxtend==0.23.4
913
+ smmap==5.0.2
914
+ aiohttp==3.13.3
915
+ opentelemetry-exporter-gcp-logging==1.11.0a0
916
+ sortedcontainers==2.4.0
917
+ pyshp==3.0.3
918
+ sklearn-compat==0.1.5
919
+ xxhash==3.6.0
920
+ zstandard==0.25.0
921
+ Mako==1.3.10
922
+ google-cloud-iam==2.21.0
923
+ autograd==1.8.0
924
+ glob2==0.7
925
+ tensorstore==0.1.81
926
+ tensorflow-probability==0.25.0
927
+ colorlover==0.3.0
928
+ ipyfilechooser==0.6.0
929
+ gradio==5.50.0
930
+ cmdstanpy==1.3.0
931
+ dm-tree==0.1.9
932
+ html5lib==1.1
933
+ python-apt==0.0.0
934
+ PyGObject==3.42.1
935
+ blinker==1.4
936
+ jeepney==0.7.1
937
+ six==1.16.0
938
+ oauthlib==3.2.0
939
+ wadllib==1.3.6
940
+ launchpadlib==1.10.16
941
+ dbus-python==1.2.18
942
+ PyJWT==2.3.0
943
+ importlib-metadata==4.6.4
944
+ httplib2==0.20.2
945
+ zipp==1.0.0
946
+ pyparsing==2.4.7
947
+ lazr.restfulclient==0.14.4
948
+ SecretStorage==3.3.1
949
+ distro==1.7.0
950
+ lazr.uri==1.0.6
951
+ more-itertools==8.10.0
952
+ python-apt==2.4.0+ubuntu4.1
953
+ cryptography==3.4.8
954
+ keyring==23.5.0
955
+ Markdown==3.3.6
956
+ Mako==1.1.3
957
+ MarkupSafe==2.0.1
958
+ packaging==24.1
959
+ inflect==7.3.1
960
+ autocommand==2.2.2
961
+ typeguard==4.3.0
962
+ jaraco.text==3.12.1
963
+ importlib_resources==6.4.0
964
+ wheel==0.43.0
965
+ zipp==3.19.2
966
+ platformdirs==4.2.2
967
+ importlib_metadata==8.0.0
968
+ tomli==2.0.1
969
+ jaraco.collections==5.1.0
970
+ more-itertools==10.3.0
971
+ typing_extensions==4.12.2
972
+ backports.tarfile==1.2.0
973
+ jaraco.functools==4.0.1
974
+ jaraco.context==5.3.0
kaggle_output/wandb/run-20260331_095935-cu87492i/files/wandb-metadata.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.6.113+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.12.12",
4
+ "startedAt": "2026-03-31T09:59:35.665971Z",
5
+ "program": "kaggle.ipynb",
6
+ "codePath": "kaggle.ipynb",
7
+ "email": "suvraadeep@gmail.com",
8
+ "root": "/kaggle/working",
9
+ "host": "7a129c53b2b5",
10
+ "executable": "/usr/bin/python3",
11
+ "cpu_count": 2,
12
+ "cpu_count_logical": 4,
13
+ "gpu": "Tesla T4",
14
+ "gpu_count": 2,
15
+ "disk": {
16
+ "/": {
17
+ "total": "8656922775552",
18
+ "used": "7347718606848"
19
+ }
20
+ },
21
+ "memory": {
22
+ "total": "33662472192"
23
+ },
24
+ "gpu_nvidia": [
25
+ {
26
+ "name": "Tesla T4",
27
+ "memoryTotal": "16106127360",
28
+ "cudaCores": 2560,
29
+ "architecture": "Turing",
30
+ "uuid": "GPU-c7ebff42-1f98-33a6-9169-a1e3925c4f52"
31
+ },
32
+ {
33
+ "name": "Tesla T4",
34
+ "memoryTotal": "16106127360",
35
+ "cudaCores": 2560,
36
+ "architecture": "Turing",
37
+ "uuid": "GPU-4f057d35-8a41-ce55-5d7d-60c77ebb7156"
38
+ }
39
+ ],
40
+ "cudaVersion": "13.0",
41
+ "writerId": "8a2fdkg7z7uti6wkzjkth5zruzlcpawp"
42
+ }
kaggle_output/wandb/run-20260331_095935-cu87492i/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":336,"_wandb":{"runtime":336}}