prakharg24 commited on
Commit
c445956
·
verified ·
1 Parent(s): b2f7cf7

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +5 -5
utils.py CHANGED
@@ -68,7 +68,7 @@ def add_red_text(text_to_display):
68
  # Define pipeline stages
69
  pipeline_data = {
70
  "Data Collection": {
71
- "explain_text": "**Data Collection:** Decisions about what data to collect and how."
72
  "Data Sources": {
73
  "explain_text": "**Data Sources:** What data sources will be used to collect data?",
74
  "sub_decisions": ["Collect existing dataset or new sensor data?", "Public datasets or Private datasets?", "Design Web Scraping or use APIs?"]
@@ -92,7 +92,7 @@ pipeline_data = {
92
  },
93
 
94
  "Data Processing": {
95
- "explain_text": "**Data Processing:** Decisions about how to process and prepare the data."
96
  "Data Cleaning": {
97
  "explain_text": "**Data Cleaning:** How should raw data be cleaned and standardized?",
98
  "sub_decisions": ["How to handle missing values?", "How to detect/remove duplicates?", "How to fix formatting errors?"]
@@ -116,7 +116,7 @@ pipeline_data = {
116
  },
117
 
118
  "Model Selection": {
119
- "explain_text": "**Model Selection:** Decisions about which model to train and the hyperparameter choices."
120
  "Model Architecture": {
121
  "explain_text": "**Model Architecture:** Which type of model is best suited to the problem?",
122
  "sub_decisions": ["Linear vs tree-based vs neural networks?", "How interpretable should the model be?", "What are computational constraints?"]
@@ -140,7 +140,7 @@ pipeline_data = {
140
  },
141
 
142
  "Model Training": {
143
- "explain_text": "**Model Training:** Decisions about the training algorithm used."
144
  "Data Splitting": {
145
  "explain_text": "**Data Splitting:** How should data be divided for training and testing?",
146
  "sub_decisions": ["Train-test split ratio?", "Cross-validation vs stratified split?"]
@@ -164,7 +164,7 @@ pipeline_data = {
164
  },
165
 
166
  "Model Evaluation": {
167
- "explain_text": "**Model Evaluation:** Decisions about the evaluation criteria."
168
  "Evaluation Metric": {
169
  "explain_text": "**Evaluation Metric:** Which metrics best reflect model performance?",
170
  "sub_decisions": ["Accuracy vs Precision/Recall/F1?", "How to handle class imbalance?", "Including probabilistic metrics (AUC, log loss)?"]
 
68
  # Define pipeline stages
69
  pipeline_data = {
70
  "Data Collection": {
71
+ "explain_text": "**Data Collection:** Decisions about what data to collect and how.",
72
  "Data Sources": {
73
  "explain_text": "**Data Sources:** What data sources will be used to collect data?",
74
  "sub_decisions": ["Collect existing dataset or new sensor data?", "Public datasets or Private datasets?", "Design Web Scraping or use APIs?"]
 
92
  },
93
 
94
  "Data Processing": {
95
+ "explain_text": "**Data Processing:** Decisions about how to process and prepare the data.",
96
  "Data Cleaning": {
97
  "explain_text": "**Data Cleaning:** How should raw data be cleaned and standardized?",
98
  "sub_decisions": ["How to handle missing values?", "How to detect/remove duplicates?", "How to fix formatting errors?"]
 
116
  },
117
 
118
  "Model Selection": {
119
+ "explain_text": "**Model Selection:** Decisions about which model to train and the hyperparameter choices.",
120
  "Model Architecture": {
121
  "explain_text": "**Model Architecture:** Which type of model is best suited to the problem?",
122
  "sub_decisions": ["Linear vs tree-based vs neural networks?", "How interpretable should the model be?", "What are computational constraints?"]
 
140
  },
141
 
142
  "Model Training": {
143
+ "explain_text": "**Model Training:** Decisions about the training algorithm used.",
144
  "Data Splitting": {
145
  "explain_text": "**Data Splitting:** How should data be divided for training and testing?",
146
  "sub_decisions": ["Train-test split ratio?", "Cross-validation vs stratified split?"]
 
164
  },
165
 
166
  "Model Evaluation": {
167
+ "explain_text": "**Model Evaluation:** Decisions about the evaluation criteria.",
168
  "Evaluation Metric": {
169
  "explain_text": "**Evaluation Metric:** Which metrics best reflect model performance?",
170
  "sub_decisions": ["Accuracy vs Precision/Recall/F1?", "How to handle class imbalance?", "Including probabilistic metrics (AUC, log loss)?"]