| { | |
| "title": "Naive Bayes Mastery: 100 MCQs", | |
| "description": "A comprehensive set of 100 multiple-choice questions to test and deepen your understanding of Naive Bayes classifiers, from fundamental concepts to advanced real-world applications and challenges.", | |
| "questions": [ | |
| { | |
| "id": 1, | |
| "questionText": "What is the core assumption of Naive Bayes?", | |
| "options": [ | |
| "All classes have equal probability", | |
| "The dataset is balanced", | |
| "Features are correlated", | |
| "Features are independent given the class label" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Naive Bayes assumes conditional independence of features given the class, which simplifies probability computation." | |
| }, | |
| { | |
| "id": 2, | |
| "questionText": "Which theorem is Naive Bayes based on?", | |
| "options": [ | |
| "Markov Theorem", | |
| "Pythagoras Theorem", | |
| "Central Limit Theorem", | |
| "Bayes' Theorem" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Naive Bayes uses Bayes’ Theorem to compute posterior probabilities for classification." | |
| }, | |
| { | |
| "id": 3, | |
| "questionText": "In Naive Bayes, what is the 'prior probability'?", | |
| "options": [ | |
| "Probability of each class before observing features", | |
| "Probability of features given the class", | |
| "Probability of misclassification", | |
| "Conditional probability of test data" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "The prior is the initial probability of each class based on the training dataset." | |
| }, | |
| { | |
| "id": 4, | |
| "questionText": "Which type of Naive Bayes is suitable for text data?", | |
| "options": [ | |
| "Gaussian Naive Bayes", | |
| "Bernoulli Naive Bayes", | |
| "Multinomial Naive Bayes", | |
| "Poisson Naive Bayes" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Multinomial NB works well for text features, as it handles word frequencies." | |
| }, | |
| { | |
| "id": 5, | |
| "questionText": "Which Naive Bayes variant is used for binary features?", | |
| "options": [ | |
| "Gaussian Naive Bayes", | |
| "Bernoulli Naive Bayes", | |
| "Poisson Naive Bayes", | |
| "Multinomial Naive Bayes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Bernoulli NB models binary presence/absence features effectively." | |
| }, | |
| { | |
| "id": 6, | |
| "questionText": "In Gaussian Naive Bayes, features are assumed to follow which distribution?", | |
| "options": [ | |
| "Uniform distribution", | |
| "Normal (Gaussian) distribution", | |
| "Exponential distribution", | |
| "Poisson distribution" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Gaussian NB models continuous features using a normal distribution." | |
| }, | |
| { | |
| "id": 7, | |
| "questionText": "What is 'likelihood' in Naive Bayes?", | |
| "options": [ | |
| "Probability of features given the class", | |
| "Posterior probability", | |
| "Prior probability", | |
| "Probability of the class given features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Likelihood is P(features|class) used in Bayes’ formula to compute posterior probability." | |
| }, | |
| { | |
| "id": 8, | |
| "questionText": "Which probability does Naive Bayes calculate to make predictions?", | |
| "options": [ | |
| "Prior probability only", | |
| "Posterior probability P(class|features)", | |
| "Feature probability only", | |
| "Joint probability of all classes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Naive Bayes computes the posterior probability for each class and selects the class with the highest value." | |
| }, | |
| { | |
| "id": 9, | |
| "questionText": "Why is it called 'Naive' Bayes?", | |
| "options": [ | |
| "Because it is simple to implement", | |
| "Because it assumes feature independence", | |
| "Because it only works on small datasets", | |
| "Because it ignores class labels" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The method is 'naive' due to its strong assumption that features are independent given the class." | |
| }, | |
| { | |
| "id": 10, | |
| "questionText": "Which metric is commonly used to evaluate Naive Bayes classifiers?", | |
| "options": [ | |
| "R-squared value", | |
| "Euclidean distance", | |
| "Accuracy, Precision, Recall, F1-score", | |
| "Mean squared error" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Classification metrics like accuracy, precision, recall, and F1-score are used to evaluate Naive Bayes performance." | |
| }, | |
| { | |
| "id": 11, | |
| "questionText": "Scenario: You have continuous features with Gaussian distribution. Which Naive Bayes variant is suitable?", | |
| "options": [ | |
| "Bernoulli Naive Bayes", | |
| "Gaussian Naive Bayes", | |
| "Multinomial Naive Bayes", | |
| "Poisson Naive Bayes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Gaussian NB models continuous features using the mean and variance of each class." | |
| }, | |
| { | |
| "id": 12, | |
| "questionText": "Scenario: Your dataset has counts of words per document. Which Naive Bayes is ideal?", | |
| "options": [ | |
| "Bernoulli Naive Bayes", | |
| "Multinomial Naive Bayes", | |
| "Gaussian Naive Bayes", | |
| "Poisson Naive Bayes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Multinomial NB handles discrete count data such as word frequencies." | |
| }, | |
| { | |
| "id": 13, | |
| "questionText": "Scenario: You have binary features indicating presence or absence. Which Naive Bayes type should you use?", | |
| "options": [ | |
| "Gaussian Naive Bayes", | |
| "Multinomial Naive Bayes", | |
| "Bernoulli Naive Bayes", | |
| "Poisson Naive Bayes" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Bernoulli NB is suitable for binary features." | |
| }, | |
| { | |
| "id": 14, | |
| "questionText": "Which problem arises if a feature has zero probability in training data?", | |
| "options": [ | |
| "Likelihood is unaffected", | |
| "Accuracy increases", | |
| "Prior probability changes", | |
| "Posterior becomes zero, causing prediction failure" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Zero probability leads to a posterior of zero. Laplace smoothing is used to avoid this." | |
| }, | |
| { | |
| "id": 15, | |
| "questionText": "What is Laplace smoothing used for in Naive Bayes?", | |
| "options": [ | |
| "To normalize features", | |
| "To handle zero probabilities", | |
| "To scale continuous features", | |
| "To reduce dimensionality" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Laplace smoothing adds a small value to feature counts to avoid zero probabilities." | |
| }, | |
| { | |
| "id": 16, | |
| "questionText": "Scenario: You apply Naive Bayes to a spam detection problem. What is the target variable?", | |
| "options": [ | |
| "Document length", | |
| "Email class (spam or not spam)", | |
| "Feature importance", | |
| "Word frequency" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "The target variable is the class label to predict, e.g., spam or ham." | |
| }, | |
| { | |
| "id": 17, | |
| "questionText": "Scenario: In text classification, why do we use log probabilities in Naive Bayes?", | |
| "options": [ | |
| "To prevent underflow from multiplying many small probabilities", | |
| "To ignore irrelevant words", | |
| "To increase accuracy", | |
| "To normalize features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Log probabilities convert multiplication into addition, avoiding numerical underflow." | |
| }, | |
| { | |
| "id": 18, | |
| "questionText": "Which is a limitation of Naive Bayes?", | |
| "options": [ | |
| "Cannot handle categorical data", | |
| "Requires large datasets only", | |
| "Does not use prior probabilities", | |
| "Assumes feature independence which is often violated" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "The independence assumption may not hold, potentially reducing accuracy." | |
| }, | |
| { | |
| "id": 19, | |
| "questionText": "Which scenario favors Naive Bayes despite its independence assumption?", | |
| "options": [ | |
| "Complex regression tasks", | |
| "Time-series prediction", | |
| "Text classification", | |
| "Image classification with correlated pixels" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Naive Bayes performs surprisingly well for text classification even when features are not fully independent." | |
| }, | |
| { | |
| "id": 20, | |
| "questionText": "Which term in Bayes’ theorem represents evidence?", | |
| "options": [ | |
| "P(features|class)", | |
| "P(class)", | |
| "P(class|features)", | |
| "P(features)" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Evidence is P(features), used to normalize posterior probabilities." | |
| }, | |
| { | |
| "id": 21, | |
| "questionText": "Scenario: You want to classify news articles. Which preprocessing step helps Naive Bayes?", | |
| "options": [ | |
| "Ignoring word frequencies", | |
| "Tokenization and stop-word removal", | |
| "Adding irrelevant words", | |
| "Random feature shuffling" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Tokenization and stop-word removal reduce noise and improve feature quality." | |
| }, | |
| { | |
| "id": 22, | |
| "questionText": "Scenario: You notice some features dominate predictions. What can help?", | |
| "options": [ | |
| "Add Laplace smoothing", | |
| "Increase k", | |
| "Remove prior probabilities", | |
| "Feature scaling or normalization" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Scaling features ensures no single feature dominates posterior computation." | |
| }, | |
| { | |
| "id": 23, | |
| "questionText": "Which is a benefit of Naive Bayes?", | |
| "options": [ | |
| "Handles missing values automatically", | |
| "Works only on balanced datasets", | |
| "Fast to train and predict", | |
| "Always accurate" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Naive Bayes is computationally efficient and works well with large datasets." | |
| }, | |
| { | |
| "id": 24, | |
| "questionText": "Scenario: Multinomial Naive Bayes is applied to short text documents. What could help?", | |
| "options": [ | |
| "TF-IDF feature weighting", | |
| "Use raw counts only", | |
| "Ignore feature scaling", | |
| "Shuffle classes randomly" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "TF-IDF emphasizes informative words and improves classification accuracy." | |
| }, | |
| { | |
| "id": 25, | |
| "questionText": "Scenario: You apply Gaussian NB but features are not Gaussian. What is likely?", | |
| "options": [ | |
| "Model may underperform", | |
| "Features are transformed automatically", | |
| "Posterior probabilities are exact", | |
| "Accuracy improves" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Gaussian NB assumes normal distribution; violations can reduce accuracy." | |
| }, | |
| { | |
| "id": 26, | |
| "questionText": "Which step avoids zero probability for unseen feature values in training?", | |
| "options": [ | |
| "Laplace smoothing", | |
| "Feature scaling", | |
| "Normalization only", | |
| "Random shuffling" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Laplace smoothing adds a small constant to feature counts." | |
| }, | |
| { | |
| "id": 27, | |
| "questionText": "Scenario: Two classes have very different sample sizes. Which helps?", | |
| "options": [ | |
| "Setting all priors equal", | |
| "Random shuffling", | |
| "Using priors proportional to class frequencies", | |
| "Ignoring class sizes" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Prior probabilities account for class imbalance in prediction." | |
| }, | |
| { | |
| "id": 28, | |
| "questionText": "Scenario: Features are correlated. What is the effect on Naive Bayes?", | |
| "options": [ | |
| "Posterior probabilities remain exact", | |
| "Independence assumption is violated, may reduce accuracy", | |
| "Model ignores correlation automatically", | |
| "Accuracy improves" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Naive Bayes assumes independence; correlations can reduce prediction reliability." | |
| }, | |
| { | |
| "id": 29, | |
| "questionText": "Which probability is directly used to choose class label in Naive Bayes?", | |
| "options": [ | |
| "Likelihood only", | |
| "Evidence only", | |
| "Posterior probability", | |
| "Prior probability only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Class with highest posterior probability is chosen as prediction." | |
| }, | |
| { | |
| "id": 30, | |
| "questionText": "Scenario: Naive Bayes is applied to multi-class classification. How is prediction done?", | |
| "options": [ | |
| "Compute posterior for each class and select maximum", | |
| "Use only the first class", | |
| "Choose class randomly", | |
| "Average class probabilities" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Posterior probabilities are computed for each class; the one with the highest is selected." | |
| }, | |
| { | |
| "id": 31, | |
| "questionText": "Scenario: In email spam detection, which feature representation works best with Multinomial NB?", | |
| "options": [ | |
| "Random numbers", | |
| "Raw characters", | |
| "Word count or TF-IDF vectors", | |
| "Binary features only" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Multinomial NB handles count-based features like word frequencies effectively." | |
| }, | |
| { | |
| "id": 32, | |
| "questionText": "Which smoothing method prevents zero probability in Naive Bayes?", | |
| "options": [ | |
| "Z-score normalization", | |
| "PCA", | |
| "Laplace smoothing", | |
| "Min-max scaling" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Laplace smoothing adds a small value to feature counts, avoiding zero probability for unseen features." | |
| }, | |
| { | |
| "id": 33, | |
| "questionText": "Scenario: You have continuous features with non-Gaussian distribution. Which strategy is suitable?", | |
| "options": [ | |
| "Use Bernoulli NB", | |
| "Discretize features or use kernel density estimation", | |
| "Ignore feature distribution", | |
| "Use Gaussian NB without changes" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Discretization or kernel density estimation allows NB to handle non-Gaussian continuous data." | |
| }, | |
| { | |
| "id": 34, | |
| "questionText": "Which assumption does Multinomial Naive Bayes make about features?", | |
| "options": [ | |
| "All features are binary", | |
| "Features are correlated", | |
| "Features represent counts/frequencies and are independent", | |
| "Features are continuous" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Multinomial NB assumes independent counts/frequencies for each feature per class." | |
| }, | |
| { | |
| "id": 35, | |
| "questionText": "Scenario: You apply Naive Bayes to a dataset with missing categorical features. What is an effective approach?", | |
| "options": [ | |
| "Use Gaussian NB", | |
| "Replace with random values", | |
| "Ignore missing data", | |
| "Treat missing values as a separate category" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Treating missing data as a separate category allows NB to include them in probability computation." | |
| }, | |
| { | |
| "id": 36, | |
| "questionText": "Scenario: You apply Laplace smoothing with alpha=1. What does alpha control?", | |
| "options": [ | |
| "Amount added to feature counts to avoid zero probability", | |
| "Learning rate", | |
| "Number of neighbors", | |
| "Feature scaling factor" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Alpha determines the additive smoothing applied to counts to handle unseen feature values." | |
| }, | |
| { | |
| "id": 37, | |
| "questionText": "Scenario: Two features are highly correlated. How does Naive Bayes handle this?", | |
| "options": [ | |
| "Weights one feature higher", | |
| "Automatically removes one feature", | |
| "Merges features into one", | |
| "Assumes independence; predictions may be biased" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Naive Bayes ignores correlation, which may reduce accuracy in such cases." | |
| }, | |
| { | |
| "id": 38, | |
| "questionText": "Scenario: Using Naive Bayes for sentiment analysis, what preprocessing step helps?", | |
| "options": [ | |
| "Tokenization, stop-word removal, and stemming", | |
| "Shuffling words randomly", | |
| "Ignoring word frequencies", | |
| "Using raw text only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Text preprocessing ensures features are meaningful and reduces noise." | |
| }, | |
| { | |
| "id": 39, | |
| "questionText": "Scenario: A new category appears in testing data unseen in training. What happens?", | |
| "options": [ | |
| "Class is automatically ignored", | |
| "Prediction remains correct", | |
| "Posterior probability becomes zero unless smoothed", | |
| "Naive Bayes creates a new class" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Without smoothing, unseen feature categories lead to zero probability and failed predictions." | |
| }, | |
| { | |
| "id": 40, | |
| "questionText": "Scenario: Features are categorical with many levels. What helps Naive Bayes performance?", | |
| "options": [ | |
| "Merging all categories", | |
| "Feature encoding and smoothing", | |
| "Ignoring levels", | |
| "Using Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Encoding categorical features and smoothing probability estimates improves performance." | |
| }, | |
| { | |
| "id": 41, | |
| "questionText": "Scenario: Naive Bayes applied to multi-class document classification. How is probability computed?", | |
| "options": [ | |
| "Equal probability for all classes", | |
| "Posterior probability for each class using prior and likelihood", | |
| "Only consider the first class", | |
| "Random selection of class" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Posterior is computed for each class and the highest is selected." | |
| }, | |
| { | |
| "id": 42, | |
| "questionText": "Scenario: You have imbalanced classes. How to adjust Naive Bayes?", | |
| "options": [ | |
| "Ignore imbalance", | |
| "Reduce feature counts", | |
| "Use class priors reflecting class frequencies", | |
| "Increase smoothing arbitrarily" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Setting class priors helps account for imbalance in predictions." | |
| }, | |
| { | |
| "id": 43, | |
| "questionText": "Scenario: Naive Bayes is applied to continuous and categorical features together. Strategy?", | |
| "options": [ | |
| "Ignore categorical features", | |
| "Use only Multinomial NB", | |
| "Use Gaussian NB for continuous, Multinomial/Bernoulli NB for categorical", | |
| "Use only Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Different variants can be combined for mixed-type features." | |
| }, | |
| { | |
| "id": 44, | |
| "questionText": "Scenario: High-dimensional text data causes overfitting. What helps?", | |
| "options": [ | |
| "Feature selection or dimensionality reduction", | |
| "Ignore rare words", | |
| "Increase Laplace alpha", | |
| "Random shuffling of features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Selecting important features reduces overfitting and improves generalization." | |
| }, | |
| { | |
| "id": 45, | |
| "questionText": "Scenario: Two words always appear together in class A. Effect on Naive Bayes?", | |
| "options": [ | |
| "Posterior probabilities unaffected", | |
| "One word ignored", | |
| "Independence assumption violated; may affect accuracy", | |
| "Model handles correlation automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Correlated features violate independence, potentially reducing prediction reliability." | |
| }, | |
| { | |
| "id": 46, | |
| "questionText": "Scenario: Naive Bayes is slow with large vocabulary. What helps?", | |
| "options": [ | |
| "Increase alpha arbitrarily", | |
| "Use raw counts only", | |
| "Feature selection or TF-IDF weighting", | |
| "Shuffle training data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Reducing feature size or weighting reduces computation and improves performance." | |
| }, | |
| { | |
| "id": 47, | |
| "questionText": "Scenario: Text classification with short documents. Which variant works best?", | |
| "options": [ | |
| "Poisson NB", | |
| "Bernoulli NB with raw counts", | |
| "Multinomial NB with TF-IDF or word counts", | |
| "Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Short text benefits from count-based Multinomial NB representation." | |
| }, | |
| { | |
| "id": 48, | |
| "questionText": "Scenario: Feature appears in all classes equally. Effect?", | |
| "options": [ | |
| "Feature dominates prediction", | |
| "Feature does not help in discriminating classes", | |
| "Posterior probability increases", | |
| "Naive Bayes ignores automatically" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Features with equal probability across classes do not contribute to classification." | |
| }, | |
| { | |
| "id": 49, | |
| "questionText": "Scenario: Multinomial NB predicts probabilities 0.7 for class A and 0.3 for class B. Decision?", | |
| "options": [ | |
| "Choose class B", | |
| "Average the classes", | |
| "Random selection", | |
| "Choose class A" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Naive Bayes selects the class with the highest posterior probability." | |
| }, | |
| { | |
| "id": 50, | |
| "questionText": "Scenario: Features are sparse with many zeros. Which is preferred?", | |
| "options": [ | |
| "Use raw dense arrays only", | |
| "Gaussian NB", | |
| "Multinomial or Bernoulli NB with sparse representation", | |
| "Ignore zeros" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Sparse-friendly NB variants handle high-dimensional sparse data efficiently." | |
| }, | |
| { | |
| "id": 51, | |
| "questionText": "Scenario: You want to explain predictions. Which Naive Bayes property helps?", | |
| "options": [ | |
| "Posterior is ignored", | |
| "Model is a black box", | |
| "Prior probabilities are hidden", | |
| "Feature contributions are interpretable via conditional probabilities" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Conditional probabilities indicate which features most influence predictions." | |
| }, | |
| { | |
| "id": 52, | |
| "questionText": "Scenario: Naive Bayes used on reviews. Some rare words exist. Solution?", | |
| "options": [ | |
| "Normalize counts only", | |
| "Apply Laplace smoothing", | |
| "Ignore rare words", | |
| "Increase k arbitrarily" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Smoothing ensures rare or unseen words do not result in zero probability." | |
| }, | |
| { | |
| "id": 53, | |
| "questionText": "Scenario: Features are normalized to 0-1. Effect on Multinomial NB?", | |
| "options": [ | |
| "Feature scaling automatically helps", | |
| "Posterior probabilities unaffected", | |
| "Accuracy improves", | |
| "Counts should remain integer; normalization may reduce effectiveness" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Multinomial NB expects count data; normalization may distort probabilities." | |
| }, | |
| { | |
| "id": 54, | |
| "questionText": "Scenario: You have continuous features. Which transformation may help Gaussian NB?", | |
| "options": [ | |
| "Ignore continuous nature", | |
| "Binary encode features", | |
| "Log-transform to reduce skewness", | |
| "Shuffle values randomly" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Transforming skewed data closer to Gaussian improves model fit." | |
| }, | |
| { | |
| "id": 55, | |
| "questionText": "Scenario: Two classes overlap heavily. Naive Bayes accuracy?", | |
| "options": [ | |
| "Model ignores overlap", | |
| "Increases automatically", | |
| "Reduced due to similar likelihoods", | |
| "Independent features help perfectly" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "When classes overlap, posterior probabilities may be close, leading to misclassification." | |
| }, | |
| { | |
| "id": 56, | |
| "questionText": "Scenario: You want to combine Gaussian and Multinomial features. Strategy?", | |
| "options": [ | |
| "Use a hybrid NB model handling each type separately", | |
| "Use Gaussian NB for all", | |
| "Ignore one type", | |
| "Convert all to counts" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Hybrid NB allows handling mixed feature types properly." | |
| }, | |
| { | |
| "id": 57, | |
| "questionText": "Scenario: Some features are highly informative, others noisy. Strategy?", | |
| "options": [ | |
| "Keep all features", | |
| "Increase alpha", | |
| "Feature selection to keep informative features", | |
| "Randomly drop features" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Selecting informative features improves classification and reduces noise influence." | |
| }, | |
| { | |
| "id": 58, | |
| "questionText": "Scenario: Words with high frequency in all classes. Effect?", | |
| "options": [ | |
| "Dominate prediction positively", | |
| "Provide little discrimination; may be removed", | |
| "Model ignores automatically", | |
| "Posterior probabilities increase" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Common words like 'the' or 'and' do not help differentiate classes." | |
| }, | |
| { | |
| "id": 59, | |
| "questionText": "Scenario: Test data has unseen word features. What is required?", | |
| "options": [ | |
| "Gaussian NB handles automatically", | |
| "Remove prior probabilities", | |
| "Ignore unseen words", | |
| "Apply Laplace smoothing" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Smoothing ensures unseen words do not produce zero posterior probability." | |
| }, | |
| { | |
| "id": 60, | |
| "questionText": "Scenario: You want probabilities instead of class labels. Naive Bayes output?", | |
| "options": [ | |
| "Only prior probability", | |
| "Only class label", | |
| "Posterior probability for each class", | |
| "Only likelihood" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "NB computes posterior probabilities, which can be used directly or thresholded for classification." | |
| }, | |
| { | |
| "id": 61, | |
| "questionText": "Scenario: Words co-occur frequently within a class. Effect?", | |
| "options": [ | |
| "Class probabilities unaffected", | |
| "NB ignores co-occurrence", | |
| "Independence assumption violated; may reduce accuracy", | |
| "Prediction improves automatically" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Correlated features violate NB assumption; may bias predictions." | |
| }, | |
| { | |
| "id": 62, | |
| "questionText": "Scenario: Multiclass NB with 10 classes. How to predict?", | |
| "options": [ | |
| "Compute posterior for each class; choose maximum", | |
| "Average class probabilities", | |
| "Random class selection", | |
| "Use only first class" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Posterior probabilities guide selection of most probable class." | |
| }, | |
| { | |
| "id": 63, | |
| "questionText": "Scenario: Some features have very low variance. Effect on Gaussian NB?", | |
| "options": [ | |
| "Model ignores feature automatically", | |
| "Posterior probability increases", | |
| "May have little impact; small variance reduces feature importance", | |
| "Feature dominates prediction" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Low-variance features contribute less to posterior probability." | |
| }, | |
| { | |
| "id": 64, | |
| "questionText": "Scenario: Sparse categorical features with many unseen values. What helps?", | |
| "options": [ | |
| "Randomly shuffle features", | |
| "Ignore rare categories", | |
| "Smoothing and proper encoding", | |
| "Use Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Smoothing and encoding unseen categories allow proper posterior computation." | |
| }, | |
| { | |
| "id": 65, | |
| "questionText": "Scenario: Words occur in multiple classes with similar frequency. Effect?", | |
| "options": [ | |
| "Model ignores feature automatically", | |
| "Feature provides little discriminative power", | |
| "Posterior probabilities increase", | |
| "Feature dominates prediction" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Non-informative features do not help classification." | |
| }, | |
| { | |
| "id": 66, | |
| "questionText": "Scenario: Features are scaled differently. Effect on Gaussian NB?", | |
| "options": [ | |
| "Feature scaling ignored", | |
| "NB unaffected", | |
| "Scaling impacts Gaussian NB since variance and mean are computed per feature", | |
| "Posterior remains exact" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Scaling changes mean/variance; proper preprocessing ensures meaningful probabilities." | |
| }, | |
| { | |
| "id": 67, | |
| "questionText": "Scenario: Class conditional distributions overlap. Accuracy?", | |
| "options": [ | |
| "NB ignores overlap", | |
| "Reduced due to similar likelihoods", | |
| "Increases automatically", | |
| "Posterior probabilities exact" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Overlap reduces discriminative power, increasing misclassification." | |
| }, | |
| { | |
| "id": 68, | |
| "questionText": "Scenario: Combining NB with feature selection. Effect?", | |
| "options": [ | |
| "Reduces accuracy", | |
| "Prior probabilities change", | |
| "Reduces noise and improves accuracy", | |
| "Ignored features dominate" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Selecting important features improves model generalization." | |
| }, | |
| { | |
| "id": 69, | |
| "questionText": "Scenario: Naive Bayes for movie genre prediction. Some features missing. Strategy?", | |
| "options": [ | |
| "Gaussian NB only", | |
| "Ignore data row", | |
| "Randomly replace", | |
| "Treat missing as separate category or impute" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Missing categorical features are handled as separate category or imputed to compute posterior." | |
| }, | |
| { | |
| "id": 70, | |
| "questionText": "Scenario: Rare feature appears in all classes equally. Impact?", | |
| "options": [ | |
| "Feature contributes little to classification", | |
| "Feature dominates prediction", | |
| "Posterior probability increases", | |
| "NB ignores automatically" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Features with equal class frequency have minimal discriminative value." | |
| }, | |
| { | |
| "id": 71, | |
| "questionText": "Scenario: You have highly imbalanced classes. What is a good strategy with Naive Bayes?", | |
| "options": [ | |
| "Increase Laplace smoothing arbitrarily", | |
| "Use only majority class", | |
| "Ignore imbalance", | |
| "Adjust class priors according to class frequencies" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Adjusting class priors ensures the model accounts for imbalance in predictions." | |
| }, | |
| { | |
| "id": 72, | |
| "questionText": "Scenario: Two features are strongly correlated. What is the effect on Naive Bayes?", | |
| "options": [ | |
| "NB automatically decorrelates features", | |
| "Independence assumption violated; may reduce accuracy", | |
| "Posterior remains exact", | |
| "Accuracy improves" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Naive Bayes assumes independence. Correlated features may bias predictions." | |
| }, | |
| { | |
| "id": 73, | |
| "questionText": "Scenario: You are predicting rare disease presence. Most patients are healthy. Which is critical?", | |
| "options": [ | |
| "Class priors and threshold adjustment", | |
| "Use Gaussian NB for all", | |
| "Ignore rare class", | |
| "Increase feature counts" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Rare class predictions require careful handling of priors and decision thresholds." | |
| }, | |
| { | |
| "id": 74, | |
| "questionText": "Scenario: Multi-class text classification with many rare words. Strategy?", | |
| "options": [ | |
| "Ignore rare words", | |
| "Use Laplace smoothing and possibly TF-IDF", | |
| "Use Gaussian NB", | |
| "Shuffle features" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Smoothing and weighting rare words prevents zero probabilities and improves generalization." | |
| }, | |
| { | |
| "id": 75, | |
| "questionText": "Scenario: Continuous features are skewed. What improves Gaussian NB?", | |
| "options": [ | |
| "Use Bernoulli NB instead", | |
| "Ignore skewness", | |
| "Log or Box-Cox transformation to approximate Gaussian distribution", | |
| "Normalize 0–1" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Transforming skewed features closer to Gaussian improves model assumptions and accuracy." | |
| }, | |
| { | |
| "id": 76, | |
| "questionText": "Scenario: Text classification. Some words appear in every class equally. Effect?", | |
| "options": [ | |
| "Dominates predictions", | |
| "NB ignores automatically", | |
| "Little discriminative value; may be removed", | |
| "Posterior increases" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Features with equal class frequency do not help differentiate classes." | |
| }, | |
| { | |
| "id": 77, | |
| "questionText": "Scenario: Combining continuous and categorical features in one dataset. Strategy?", | |
| "options": [ | |
| "Use hybrid NB (Gaussian for continuous, Multinomial/Bernoulli for categorical)", | |
| "Convert all to counts", | |
| "Ignore one type", | |
| "Use Gaussian NB only" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Hybrid NB allows proper handling of mixed feature types." | |
| }, | |
| { | |
| "id": 78, | |
| "questionText": "Scenario: Naive Bayes applied on streaming data with changing distributions. Strategy?", | |
| "options": [ | |
| "Use Gaussian NB only", | |
| "Ignore distribution change", | |
| "Randomly drop old data", | |
| "Retrain periodically or use incremental NB" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Incremental learning or periodic retraining adapts to distribution shifts in streaming data." | |
| }, | |
| { | |
| "id": 79, | |
| "questionText": "Scenario: High-dimensional sparse data. What optimization helps?", | |
| "options": [ | |
| "Sparse representation and feature selection", | |
| "Shuffle features", | |
| "Increase Laplace alpha only", | |
| "Use raw dense matrix" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Sparse storage and feature selection reduce computation and memory use." | |
| }, | |
| { | |
| "id": 80, | |
| "questionText": "Scenario: Words co-occur frequently within a class. Effect?", | |
| "options": [ | |
| "Violates independence; may bias predictions", | |
| "Improves accuracy automatically", | |
| "NB ignores co-occurrence", | |
| "Posterior unchanged" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Correlated features violate NB assumption; predictions may be biased." | |
| }, | |
| { | |
| "id": 81, | |
| "questionText": "Scenario: Large vocabulary with many zero-count features. How to handle?", | |
| "options": [ | |
| "Use Laplace smoothing", | |
| "Remove zeros arbitrarily", | |
| "Use Gaussian NB", | |
| "Ignore rare features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Smoothing ensures zero-count features do not yield zero probability." | |
| }, | |
| { | |
| "id": 82, | |
| "questionText": "Scenario: Test data has unseen feature categories. Solution?", | |
| "options": [ | |
| "Apply Laplace smoothing or treat as unknown category", | |
| "Randomly assign values", | |
| "Use Gaussian NB", | |
| "Ignore unseen categories" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Smoothing allows unseen categories to be incorporated safely." | |
| }, | |
| { | |
| "id": 83, | |
| "questionText": "Scenario: Overlapping class distributions. Naive Bayes accuracy?", | |
| "options": [ | |
| "NB ignores overlap", | |
| "Posterior exact", | |
| "Improves automatically", | |
| "Reduced due to similar likelihoods" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Overlap reduces discriminative power, increasing misclassification risk." | |
| }, | |
| { | |
| "id": 84, | |
| "questionText": "Scenario: NB output shows posterior probabilities 0.51 vs 0.49. Interpretation?", | |
| "options": [ | |
| "Prediction is exact", | |
| "Model is uncertain; threshold adjustment may help", | |
| "Ignore probabilities", | |
| "Choose lower class" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Close probabilities indicate uncertainty; thresholds or confidence measures can improve decision-making." | |
| }, | |
| { | |
| "id": 85, | |
| "questionText": "Scenario: Gaussian NB feature has extremely low variance. Effect?", | |
| "options": [ | |
| "Posterior increases", | |
| "Feature dominates prediction", | |
| "Ignored automatically", | |
| "Feature contributes little; may be ignored" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Low-variance features have minimal impact on posterior probability." | |
| }, | |
| { | |
| "id": 86, | |
| "questionText": "Scenario: Multi-class NB with 15 classes. Prediction method?", | |
| "options": [ | |
| "Average probabilities", | |
| "Use only first class", | |
| "Compute posterior for each class and choose maximum", | |
| "Choose randomly" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "The class with the highest posterior probability is selected." | |
| }, | |
| { | |
| "id": 87, | |
| "questionText": "Scenario: NB applied on mixed numeric and categorical features. Preprocessing?", | |
| "options": [ | |
| "Ignore one feature type", | |
| "Normalize all", | |
| "Gaussian for numeric, Multinomial/Bernoulli for categorical", | |
| "Convert numeric to binary" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Proper variant selection ensures correct probability calculation." | |
| }, | |
| { | |
| "id": 88, | |
| "questionText": "Scenario: Rare features appear in training but not testing. How to handle?", | |
| "options": [ | |
| "Apply smoothing to prevent zero probability", | |
| "Randomly assign probabilities", | |
| "Ignore rare features", | |
| "Use Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Smoothing prevents zero posterior for rare or unseen features." | |
| }, | |
| { | |
| "id": 89, | |
| "questionText": "Scenario: Continuous features heavily skewed. Best approach?", | |
| "options": [ | |
| "Convert to binary", | |
| "Use only categorical NB", | |
| "Log-transform to approximate Gaussian", | |
| "Ignore skewness" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Transforming skewed continuous features improves Gaussian NB assumptions." | |
| }, | |
| { | |
| "id": 90, | |
| "questionText": "Scenario: Text classification with highly frequent words like 'the'. What should you do?", | |
| "options": [ | |
| "Increase Laplace alpha", | |
| "Remove stop words", | |
| "Keep all words", | |
| "Randomly shuffle" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Stop-word removal prevents common non-informative words from dominating probabilities." | |
| }, | |
| { | |
| "id": 91, | |
| "questionText": "Scenario: NB used on streaming data with evolving distribution. What helps?", | |
| "options": [ | |
| "Ignore drift", | |
| "Use Gaussian NB only", | |
| "Incremental NB or periodic retraining", | |
| "Discard old data" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Incremental learning adapts the model to changing feature distributions." | |
| }, | |
| { | |
| "id": 92, | |
| "questionText": "Scenario: Words appear together in many documents (correlation). Effect?", | |
| "options": [ | |
| "NB ignores correlation", | |
| "Posterior unaffected", | |
| "Violates independence; may reduce accuracy", | |
| "Improves accuracy" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Correlated features violate the conditional independence assumption." | |
| }, | |
| { | |
| "id": 93, | |
| "questionText": "Scenario: Multi-class NB. One class has very few examples. Strategy?", | |
| "options": [ | |
| "Use priors and smoothing to handle small classes", | |
| "Duplicate small class", | |
| "Ignore small class", | |
| "Remove features" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Small classes require careful handling of priors and smoothing to avoid misclassification." | |
| }, | |
| { | |
| "id": 94, | |
| "questionText": "Scenario: Mixed sparse and dense features. Optimization?", | |
| "options": [ | |
| "Use sparse representation for sparse features", | |
| "Convert all to dense", | |
| "Ignore sparse features", | |
| "Use only Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 0, | |
| "explanation": "Sparse storage reduces memory and computation costs." | |
| }, | |
| { | |
| "id": 95, | |
| "questionText": "Scenario: NB misclassifies some classes consistently. Probable cause?", | |
| "options": [ | |
| "Posterior probabilities are exact", | |
| "Model ignores priors", | |
| "Independence assumption violated or poor feature selection", | |
| "Smoothing too high" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Feature correlation or irrelevant features can bias predictions." | |
| }, | |
| { | |
| "id": 96, | |
| "questionText": "Scenario: Gaussian NB on features with large range differences. What is required?", | |
| "options": [ | |
| "Keep raw values", | |
| "Apply Laplace smoothing", | |
| "Random shuffling", | |
| "Standardize or normalize features" | |
| ], | |
| "correctAnswerIndex": 3, | |
| "explanation": "Feature scaling ensures Gaussian parameters are meaningful." | |
| }, | |
| { | |
| "id": 97, | |
| "questionText": "Scenario: NB for sentiment analysis with short documents. Strategy?", | |
| "options": [ | |
| "Poisson NB", | |
| "Use Multinomial NB with TF-IDF or counts", | |
| "Use Gaussian NB", | |
| "Bernoulli NB with raw counts" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Short text benefits from count-based representation." | |
| }, | |
| { | |
| "id": 98, | |
| "questionText": "Scenario: Feature occurs frequently in all classes. Effect?", | |
| "options": [ | |
| "NB ignores automatically", | |
| "Posterior probability increases", | |
| "Provides little discriminative power; may be removed", | |
| "Dominates prediction" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Non-informative features do not help classification." | |
| }, | |
| { | |
| "id": 99, | |
| "questionText": "Scenario: NB applied on multi-lingual text. Strategy?", | |
| "options": [ | |
| "Ignore language differences", | |
| "Merge all text blindly", | |
| "Separate feature sets per language or use language-independent features", | |
| "Use Gaussian NB" | |
| ], | |
| "correctAnswerIndex": 2, | |
| "explanation": "Language-specific preprocessing ensures meaningful feature extraction." | |
| }, | |
| { | |
| "id": 100, | |
| "questionText": "Scenario: You want to explain which features influenced prediction. Which NB property helps?", | |
| "options": [ | |
| "Only prior matters", | |
| "Conditional probabilities show feature contributions", | |
| "Posterior probabilities ignored", | |
| "Model is black-box" | |
| ], | |
| "correctAnswerIndex": 1, | |
| "explanation": "Conditional probabilities indicate how each feature contributes to the posterior probability." | |
| } | |
| ] | |
| } | |