Update utils.py
Browse files
utils.py
CHANGED
|
@@ -68,7 +68,7 @@ def add_red_text(text_to_display):
|
|
| 68 |
# Define pipeline stages
|
| 69 |
pipeline_data = {
|
| 70 |
"Data Collection": {
|
| 71 |
-
"explain_text": "**Data Collection:** Decisions about what data to collect and how."
|
| 72 |
"Data Sources": {
|
| 73 |
"explain_text": "**Data Sources:** What data sources will be used to collect data?",
|
| 74 |
"sub_decisions": ["Collect existing dataset or new sensor data?", "Public datasets or Private datasets?", "Design Web Scraping or use APIs?"]
|
|
@@ -92,7 +92,7 @@ pipeline_data = {
|
|
| 92 |
},
|
| 93 |
|
| 94 |
"Data Processing": {
|
| 95 |
-
"explain_text": "**Data Processing:** Decisions about how to process and prepare the data."
|
| 96 |
"Data Cleaning": {
|
| 97 |
"explain_text": "**Data Cleaning:** How should raw data be cleaned and standardized?",
|
| 98 |
"sub_decisions": ["How to handle missing values?", "How to detect/remove duplicates?", "How to fix formatting errors?"]
|
|
@@ -116,7 +116,7 @@ pipeline_data = {
|
|
| 116 |
},
|
| 117 |
|
| 118 |
"Model Selection": {
|
| 119 |
-
"explain_text": "**Model Selection:** Decisions about which model to train and the hyperparameter choices."
|
| 120 |
"Model Architecture": {
|
| 121 |
"explain_text": "**Model Architecture:** Which type of model is best suited to the problem?",
|
| 122 |
"sub_decisions": ["Linear vs tree-based vs neural networks?", "How interpretable should the model be?", "What are computational constraints?"]
|
|
@@ -140,7 +140,7 @@ pipeline_data = {
|
|
| 140 |
},
|
| 141 |
|
| 142 |
"Model Training": {
|
| 143 |
-
"explain_text": "**Model Training:** Decisions about the training algorithm used."
|
| 144 |
"Data Splitting": {
|
| 145 |
"explain_text": "**Data Splitting:** How should data be divided for training and testing?",
|
| 146 |
"sub_decisions": ["Train-test split ratio?", "Cross-validation vs stratified split?"]
|
|
@@ -164,7 +164,7 @@ pipeline_data = {
|
|
| 164 |
},
|
| 165 |
|
| 166 |
"Model Evaluation": {
|
| 167 |
-
"explain_text": "**Model Evaluation:** Decisions about the evaluation criteria."
|
| 168 |
"Evaluation Metric": {
|
| 169 |
"explain_text": "**Evaluation Metric:** Which metrics best reflect model performance?",
|
| 170 |
"sub_decisions": ["Accuracy vs Precision/Recall/F1?", "How to handle class imbalance?", "Including probabilistic metrics (AUC, log loss)?"]
|
|
|
|
| 68 |
# Define pipeline stages
|
| 69 |
pipeline_data = {
|
| 70 |
"Data Collection": {
|
| 71 |
+
"explain_text": "**Data Collection:** Decisions about what data to collect and how.",
|
| 72 |
"Data Sources": {
|
| 73 |
"explain_text": "**Data Sources:** What data sources will be used to collect data?",
|
| 74 |
"sub_decisions": ["Collect existing dataset or new sensor data?", "Public datasets or Private datasets?", "Design Web Scraping or use APIs?"]
|
|
|
|
| 92 |
},
|
| 93 |
|
| 94 |
"Data Processing": {
|
| 95 |
+
"explain_text": "**Data Processing:** Decisions about how to process and prepare the data.",
|
| 96 |
"Data Cleaning": {
|
| 97 |
"explain_text": "**Data Cleaning:** How should raw data be cleaned and standardized?",
|
| 98 |
"sub_decisions": ["How to handle missing values?", "How to detect/remove duplicates?", "How to fix formatting errors?"]
|
|
|
|
| 116 |
},
|
| 117 |
|
| 118 |
"Model Selection": {
|
| 119 |
+
"explain_text": "**Model Selection:** Decisions about which model to train and the hyperparameter choices.",
|
| 120 |
"Model Architecture": {
|
| 121 |
"explain_text": "**Model Architecture:** Which type of model is best suited to the problem?",
|
| 122 |
"sub_decisions": ["Linear vs tree-based vs neural networks?", "How interpretable should the model be?", "What are computational constraints?"]
|
|
|
|
| 140 |
},
|
| 141 |
|
| 142 |
"Model Training": {
|
| 143 |
+
"explain_text": "**Model Training:** Decisions about the training algorithm used.",
|
| 144 |
"Data Splitting": {
|
| 145 |
"explain_text": "**Data Splitting:** How should data be divided for training and testing?",
|
| 146 |
"sub_decisions": ["Train-test split ratio?", "Cross-validation vs stratified split?"]
|
|
|
|
| 164 |
},
|
| 165 |
|
| 166 |
"Model Evaluation": {
|
| 167 |
+
"explain_text": "**Model Evaluation:** Decisions about the evaluation criteria.",
|
| 168 |
"Evaluation Metric": {
|
| 169 |
"explain_text": "**Evaluation Metric:** Which metrics best reflect model performance?",
|
| 170 |
"sub_decisions": ["Accuracy vs Precision/Recall/F1?", "How to handle class imbalance?", "Including probabilistic metrics (AUC, log loss)?"]
|