deedrop1140's picture
Upload 41 files
0d00d62 verified
{
"title": "t-SNE Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions on t-Distributed Stochastic Neighbor Embedding (t-SNE), covering basic theory, medium-level conceptual understanding, and hard scenario-based applications.",
"questions": [
{
"id": 1,
"questionText": "What does t-SNE stand for?",
"options": [
"t-Scaled Neighbor Estimation",
"t-Distributed Stochastic Neighbor Embedding",
"Tensor Stochastic Network Embedding",
"Total Stochastic Neural Embedding"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE stands for t-Distributed Stochastic Neighbor Embedding, a technique for dimensionality reduction and visualization of high-dimensional data."
},
{
"id": 2,
"questionText": "What is the primary purpose of t-SNE?",
"options": [
"Normalizing data features",
"Generating synthetic data",
"Reducing dimensionality for visualization",
"Classifying high-dimensional data"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE is primarily used to reduce high-dimensional data into 2D or 3D for visualization while preserving local structure."
},
{
"id": 3,
"questionText": "t-SNE is particularly good at preserving which type of data structure?",
"options": [
"Linear relationships",
"Global distances",
"Local neighborhood structure",
"Class labels"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE focuses on preserving local similarities, meaning points that are close in high-dimensional space remain close in low-dimensional space."
},
{
"id": 4,
"questionText": "What is the usual output dimension for t-SNE visualization?",
"options": [
"1D",
"10D",
"2D or 3D",
"5D"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE is typically used to reduce data to 2D or 3D for easy visualization."
},
{
"id": 5,
"questionText": "Which probability distribution is used in the low-dimensional space of t-SNE?",
"options": [
"Student’s t-distribution",
"Gaussian distribution",
"Binomial distribution",
"Uniform distribution"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE uses a Student’s t-distribution with one degree of freedom in the low-dimensional space to model pairwise similarities and prevent crowding."
},
{
"id": 6,
"questionText": "Which cost function does t-SNE minimize?",
"options": [
"Cross-Entropy",
"Euclidean distance",
"Mean Squared Error",
"Kullback-Leibler divergence"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE minimizes the Kullback-Leibler divergence between high-dimensional and low-dimensional probability distributions."
},
{
"id": 7,
"questionText": "In t-SNE, what is 'perplexity'?",
"options": [
"Effective number of neighbors considered",
"Learning rate for gradient descent",
"A measure of dataset size",
"Number of output dimensions"
],
"correctAnswerIndex": 0,
"explanation": "Perplexity controls how many neighbors influence the calculation of similarities; it acts like a smooth measure of the number of neighbors."
},
{
"id": 8,
"questionText": "t-SNE is best suited for which type of relationships?",
"options": [
"Only categorical",
"Linear",
"Only continuous labels",
"Non-linear"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE captures non-linear relationships that linear methods like PCA may miss."
},
{
"id": 9,
"questionText": "Which step is part of the t-SNE algorithm?",
"options": [
"Clustering data into fixed bins",
"Computing pairwise similarities in high-dimensional space",
"Normalizing labels only",
"Sorting features alphabetically"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE first computes pairwise similarities between all points in the high-dimensional space."
},
{
"id": 10,
"questionText": "t-SNE initialization in low-dimensional space is usually:",
"options": [
"Zero matrix",
"Random",
"Label-based ordering",
"PCA projection"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE typically starts with random placement of points in the low-dimensional space."
},
{
"id": 11,
"questionText": "Which of these is a limitation of t-SNE?",
"options": [
"Requires categorical labels",
"Does not work on numeric data",
"Does not scale well to very large datasets",
"Cannot handle linear relationships"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE can be computationally expensive and memory-intensive for large datasets."
},
{
"id": 12,
"questionText": "Which t-SNE hyperparameter affects convergence speed?",
"options": [
"Learning rate",
"Perplexity",
"KL divergence",
"Number of features"
],
"correctAnswerIndex": 0,
"explanation": "The learning rate determines the step size in gradient descent optimization of the t-SNE cost function."
},
{
"id": 13,
"questionText": "t-SNE is mainly used for:",
"options": [
"Prediction",
"Clustering as a main algorithm",
"Classification",
"Dimensionality reduction for visualization"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE reduces dimensionality to visualize complex high-dimensional data effectively."
},
{
"id": 14,
"questionText": "Which of these statements about t-SNE is correct?",
"options": [
"It only works on 2D input",
"It preserves global distances exactly",
"It produces deterministic results",
"It preserves local neighborhood structure"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE focuses on maintaining local structure; global distances may be distorted."
},
{
"id": 15,
"questionText": "t-SNE reduces crowding in low-dimensional space using:",
"options": [
"Euclidean distance in high dimension only",
"Gaussian kernel in high dimension, Student’s t-distribution in low dimension",
"Uniform distance mapping",
"PCA initialization only"
],
"correctAnswerIndex": 1,
"explanation": "Using t-distribution in low dimension with heavy tails helps spread out points to avoid crowding."
},
{
"id": 16,
"questionText": "t-SNE’s output can vary between runs due to:",
"options": [
"Random initialization",
"Gradient descent step size",
"Data normalization",
"Perplexity only"
],
"correctAnswerIndex": 0,
"explanation": "Random initialization in low-dimensional space can lead to different local minima in optimization."
},
{
"id": 17,
"questionText": "Which of these is true about t-SNE and PCA?",
"options": [
"t-SNE is deterministic like PCA",
"Both capture only linear structures",
"PCA is better for visualization",
"t-SNE captures non-linear structure; PCA is linear"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE can capture complex non-linear relationships, whereas PCA preserves only linear variance."
},
{
"id": 18,
"questionText": "t-SNE is not suitable for:",
"options": [
"Non-linear data",
"Small datasets",
"Large-scale datasets without optimization",
"2D visualization"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE has high computational and memory cost for very large datasets."
},
{
"id": 19,
"questionText": "Which t-SNE hyperparameter influences the balance between local and global structure?",
"options": [
"Learning rate",
"Perplexity",
"Output dimension",
"KL divergence"
],
"correctAnswerIndex": 1,
"explanation": "Perplexity acts as a smooth measure of the number of neighbors, balancing local vs. slightly broader structures."
},
{
"id": 20,
"questionText": "t-SNE is stochastic because:",
"options": [
"It uses KL divergence",
"It uses random initialization and gradient descent",
"It uses linear mapping",
"It uses PCA first"
],
"correctAnswerIndex": 1,
"explanation": "The combination of random initialization and stochastic optimization leads to variability in results."
},
{
"id": 21,
"questionText": "Which of these datasets is most appropriate for t-SNE?",
"options": [
"Low-dimensional 2D data only",
"Empty datasets",
"Categorical data without encoding",
"High-dimensional numeric data for visualization"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE is designed to visualize high-dimensional data by projecting it to 2D or 3D."
},
{
"id": 22,
"questionText": "t-SNE helps in which task indirectly?",
"options": [
"Label encoding",
"Understanding clusters or patterns",
"Making predictions",
"Model regularization"
],
"correctAnswerIndex": 1,
"explanation": "While t-SNE does not perform clustering, it can help visually identify clusters or patterns."
},
{
"id": 23,
"questionText": "Why does t-SNE use Student’s t-distribution in low dimensions?",
"options": [
"To handle the 'crowding problem' by allowing heavy tails",
"To increase perplexity",
"To linearize data",
"To simplify computation"
],
"correctAnswerIndex": 0,
"explanation": "Heavy-tailed t-distribution spreads out points in low-dimensional space, avoiding crowding."
},
{
"id": 24,
"questionText": "t-SNE is sensitive to which of the following?",
"options": [
"Number of labels only",
"Dataset size irrelevant",
"Hyperparameters (perplexity, learning rate) and initialization",
"Output dimension only"
],
"correctAnswerIndex": 2,
"explanation": "Small changes in parameters or random initialization can significantly affect t-SNE results."
},
{
"id": 25,
"questionText": "t-SNE preserves which type of distance?",
"options": [
"Global Euclidean distance",
"Local pairwise similarity",
"Cosine distance",
"Manhattan distance"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE preserves pairwise similarities among neighbors rather than absolute global distances."
},
{
"id": 26,
"questionText": "Which of these is a recommended practice before t-SNE?",
"options": [
"Removing labels",
"Standardizing or normalizing features",
"Shuffling the dataset randomly",
"Discretizing continuous features"
],
"correctAnswerIndex": 1,
"explanation": "Feature scaling ensures no single feature dominates pairwise distance calculations."
},
{
"id": 27,
"questionText": "t-SNE is mainly used in which field?",
"options": [
"Optimization of hyperparameters",
"Data visualization, exploratory data analysis",
"Regression",
"Prediction"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE helps visualize high-dimensional data in 2D or 3D for analysis and pattern detection."
},
{
"id": 28,
"questionText": "t-SNE is different from PCA because:",
"options": [
"It is non-linear and focuses on local similarities",
"It reduces to a single principal component",
"It preserves global linear variance",
"It always gives deterministic results"
],
"correctAnswerIndex": 0,
"explanation": "Unlike PCA, t-SNE focuses on preserving local structure and can capture complex non-linear relationships."
},
{
"id": 29,
"questionText": "Which of these can be used to accelerate t-SNE on large datasets?",
"options": [
"Reduce iterations to 1",
"Increase perplexity to maximum",
"Use raw data without scaling",
"Barnes-Hut approximation or FIt-SNE"
],
"correctAnswerIndex": 3,
"explanation": "Barnes-Hut t-SNE and FIt-SNE optimize computation for larger datasets."
},
{
"id": 30,
"questionText": "t-SNE is primarily a ______ technique.",
"options": [
"Clustering algorithm",
"Regression",
"Classification",
"Visualization and dimensionality reduction"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE is mainly used to reduce dimensionality of data for visualization purposes."
},
{
"id": 31,
"questionText": "t-SNE uses which similarity measure in high-dimensional space?",
"options": [
"Conditional probability based on Gaussian distribution",
"Hamming distance",
"Manhattan distance",
"Cosine similarity"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE converts pairwise distances into conditional probabilities using a Gaussian distribution to represent similarity in high-dimensional space."
},
{
"id": 32,
"questionText": "Scenario: You increase t-SNE perplexity from 5 to 50. Likely effect?",
"options": [
"KL divergence becomes zero",
"Clusters appear tighter and more separated",
"Clusters merge, representing broader neighborhood",
"Visualization fails"
],
"correctAnswerIndex": 2,
"explanation": "Higher perplexity considers more neighbors, leading to a broader view of local structure and sometimes merging of clusters."
},
{
"id": 33,
"questionText": "t-SNE output varies between runs due to:",
"options": [
"Variance scaling",
"Perplexity normalization",
"Gradient descent randomness and initialization",
"Feature selection"
],
"correctAnswerIndex": 2,
"explanation": "Random initialization combined with stochastic gradient descent optimization can lead to different results in different runs."
},
{
"id": 34,
"questionText": "Scenario: You apply t-SNE to 1000-dimensional word embeddings. Best practice?",
"options": [
"Normalize only labels",
"Discard half of the words randomly",
"Optionally perform PCA first to reduce dimensions before t-SNE",
"Apply t-SNE directly without scaling"
],
"correctAnswerIndex": 2,
"explanation": "Using PCA first reduces noise and computation while retaining most variance, improving t-SNE performance on high-dimensional embeddings."
},
{
"id": 35,
"questionText": "t-SNE is sensitive to which hyperparameters?",
"options": [
"Perplexity, learning rate, number of iterations",
"Data type",
"Number of output labels only",
"PCA components only"
],
"correctAnswerIndex": 0,
"explanation": "Perplexity, learning rate, and iterations significantly influence the optimization and visualization outcome."
},
{
"id": 36,
"questionText": "Scenario: t-SNE shows distorted global distances. Reason?",
"options": [
"t-SNE focuses on preserving local structure, not global distances",
"Data not normalized",
"Algorithm failed",
"Number of components is wrong"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE prioritizes local similarity preservation; global distances may be distorted in low-dimensional visualization."
},
{
"id": 37,
"questionText": "t-SNE uses which distribution in low-dimensional space to compute similarities?",
"options": [
"Student’s t-distribution",
"Poisson",
"Uniform",
"Gaussian"
],
"correctAnswerIndex": 0,
"explanation": "A heavy-tailed Student’s t-distribution is used to avoid crowding in low-dimensional embeddings."
},
{
"id": 38,
"questionText": "Scenario: t-SNE applied to small dataset, clusters overlap in 2D. Possible reason?",
"options": [
"Learning rate too small",
"All of the above",
"Data scaled incorrectly",
"Perplexity too high"
],
"correctAnswerIndex": 1,
"explanation": "Perplexity, learning rate, and feature scaling all affect t-SNE output; poor tuning can cause cluster overlap."
},
{
"id": 39,
"questionText": "t-SNE reduces dimensionality mainly for:",
"options": [
"Prediction accuracy",
"Label generation",
"Visualization of high-dimensional patterns",
"Feature elimination"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE helps visualize complex high-dimensional data by reducing it to 2D or 3D while preserving local structure."
},
{
"id": 40,
"questionText": "Scenario: t-SNE shows similar points far apart. Likely cause?",
"options": [
"Random initialization",
"All of the above",
"Insufficient iterations",
"Improper perplexity or learning rate"
],
"correctAnswerIndex": 1,
"explanation": "All these factors can distort local relationships in low-dimensional mapping."
},
{
"id": 41,
"questionText": "Scenario: High-dimensional clusters not visible after t-SNE. Solution?",
"options": [
"Use PCA for pre-reduction",
"Tune perplexity and learning rate",
"All of the above",
"Increase iterations"
],
"correctAnswerIndex": 2,
"explanation": "Proper hyperparameter tuning, PCA pre-reduction, and enough iterations improve cluster separation."
},
{
"id": 42,
"questionText": "t-SNE optimization uses which method?",
"options": [
"Gradient descent",
"Random selection",
"Eigen decomposition only",
"Closed-form solution"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE minimizes KL divergence using iterative gradient descent."
},
{
"id": 43,
"questionText": "Scenario: You use t-SNE on image embeddings, clusters appear inconsistent. Recommendation?",
"options": [
"Reduce iterations",
"Use raw pixels without embeddings",
"Change output dimension to 1D",
"Repeat multiple runs and average or use PCA initialization"
],
"correctAnswerIndex": 3,
"explanation": "Due to randomness, multiple runs or PCA initialization can stabilize t-SNE visualization."
},
{
"id": 44,
"questionText": "Scenario: t-SNE applied after PCA with 50 components. Benefit?",
"options": [
"Generates labels",
"Prevents convergence",
"Distorts local structure",
"Reduces noise and computation"
],
"correctAnswerIndex": 3,
"explanation": "PCA pre-reduction helps t-SNE handle high-dimensional data efficiently while preserving structure."
},
{
"id": 45,
"questionText": "t-SNE is mainly affected by:",
"options": [
"Number of labels",
"Hyperparameters and data scaling",
"Dataset name",
"Feature type only"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE results are sensitive to perplexity, learning rate, iterations, and proper feature scaling."
},
{
"id": 46,
"questionText": "Scenario: t-SNE clusters different classes but distorts distances. Interpretation?",
"options": [
"Data incorrect",
"Local structure preserved; global distances may differ",
"Output dimension wrong",
"Algorithm failed"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE emphasizes local neighbor relations, which can distort large-scale global distances."
},
{
"id": 47,
"questionText": "t-SNE is not ideal for:",
"options": [
"Exploring patterns",
"Small datasets",
"Extremely large datasets without optimization",
"Visualizing embeddings"
],
"correctAnswerIndex": 2,
"explanation": "t-SNE has high computational cost for very large datasets, though optimized versions exist."
},
{
"id": 48,
"questionText": "Scenario: Learning rate too high in t-SNE. Effect?",
"options": [
"All of the above",
"Optimization diverges, poor visualization",
"Slower convergence",
"Better cluster separation"
],
"correctAnswerIndex": 1,
"explanation": "Excessively high learning rate can prevent gradient descent from converging, causing chaotic mapping."
},
{
"id": 49,
"questionText": "Scenario: Low perplexity used on dense dataset. Effect?",
"options": [
"Improves convergence",
"All points overlap",
"Merges clusters",
"Overemphasizes very local structure, clusters may fragment"
],
"correctAnswerIndex": 3,
"explanation": "Low perplexity focuses on few neighbors, possibly fragmenting clusters that are globally coherent."
},
{
"id": 50,
"questionText": "Scenario: t-SNE applied to gene expression data for visualization. Useful because?",
"options": [
"Generates labels",
"Predicts outcomes",
"Reduces features for training",
"Highlights local patterns and clusters of similar samples"
],
"correctAnswerIndex": 3,
"explanation": "t-SNE reveals underlying patterns in high-dimensional gene expression data."
},
{
"id": 51,
"questionText": "Scenario: After applying t-SNE, some clusters appear elongated. Likely cause?",
"options": [
"Perplexity or learning rate not optimal",
"Insufficient iterations",
"Random initialization",
"All of the above"
],
"correctAnswerIndex": 3,
"explanation": "Cluster shape distortions can result from improper hyperparameters, initialization, or insufficient optimization steps."
},
{
"id": 52,
"questionText": "Scenario: t-SNE shows overlapping clusters for distinct classes. Recommended action?",
"options": [
"Reduce dataset size",
"Increase output dimensions beyond 3",
"Use raw data without scaling",
"Adjust perplexity or learning rate, or try PCA initialization"
],
"correctAnswerIndex": 3,
"explanation": "Hyperparameter tuning and proper initialization help better separate clusters in low-dimensional mapping."
},
{
"id": 53,
"questionText": "t-SNE can be combined with PCA to:",
"options": [
"Replace t-SNE entirely",
"Increase perplexity automatically",
"Reduce noise and dimensionality before t-SNE",
"Generate class labels"
],
"correctAnswerIndex": 2,
"explanation": "Using PCA first reduces high-dimensional noise, improving t-SNE efficiency and visualization quality."
},
{
"id": 54,
"questionText": "Scenario: t-SNE on image embeddings produces different plots on repeated runs. Reason?",
"options": [
"Data scaling issues",
"Random initialization and stochastic gradient descent",
"Perplexity too low",
"Output dimension is too small"
],
"correctAnswerIndex": 1,
"explanation": "Variability is due to random initialization and stochastic optimization inherent to t-SNE."
},
{
"id": 55,
"questionText": "Scenario: t-SNE applied to word embeddings shows tight clusters merging. Likely reason?",
"options": [
"KL divergence minimized",
"Learning rate too low",
"Perplexity too high, considering more neighbors",
"Insufficient iterations"
],
"correctAnswerIndex": 2,
"explanation": "High perplexity broadens the neighborhood, causing close clusters to merge visually."
},
{
"id": 56,
"questionText": "Scenario: Large dataset t-SNE visualization is slow. Solution?",
"options": [
"Reduce perplexity to 1",
"Increase learning rate to max",
"Use Barnes-Hut t-SNE or FIt-SNE approximation",
"Use raw data without normalization"
],
"correctAnswerIndex": 2,
"explanation": "Optimized t-SNE versions like Barnes-Hut or FIt-SNE reduce computation and memory cost for large datasets."
},
{
"id": 57,
"questionText": "Scenario: t-SNE on 100-dimensional embeddings, output 2D. Why might global distances be inaccurate?",
"options": [
"Incorrect PCA initialization",
"t-SNE prioritizes local neighborhood preservation over global distances",
"Random features selected",
"Algorithm failed"
],
"correctAnswerIndex": 1,
"explanation": "t-SNE focuses on preserving local similarities; global distances may be distorted in low-dimensional space."
},
{
"id": 58,
"questionText": "Scenario: t-SNE applied on noisy dataset. Recommended preprocessing?",
"options": [
"Normalize or standardize features, optionally reduce noise with PCA",
"Reduce output dimension to 1D",
"Leave data raw",
"Increase perplexity to max"
],
"correctAnswerIndex": 0,
"explanation": "Scaling and dimensionality reduction improve t-SNE’s ability to capture meaningful structure."
},
{
"id": 59,
"questionText": "Scenario: Two similar clusters in high-dimensional space are far apart in t-SNE plot. Likely reason?",
"options": [
"Data labeling issues",
"Algorithm failure",
"Insufficient perplexity or learning rate tuning",
"Incorrect output dimension"
],
"correctAnswerIndex": 2,
"explanation": "Improper hyperparameters can distort low-dimensional mapping even if local structure is partially preserved."
},
{
"id": 60,
"questionText": "t-SNE can indirectly help in which of these tasks?",
"options": [
"Direct prediction",
"Visual identification of clusters or patterns",
"Label encoding",
"Feature selection for regression"
],
"correctAnswerIndex": 1,
"explanation": "While not a clustering method itself, t-SNE helps visually identify clusters or patterns in high-dimensional data."
},
{
"id": 61,
"questionText": "Scenario: You want to visualize 10,000 points with t-SNE but it is slow. Best practice?",
"options": [
"Increase perplexity to 1000",
"Use optimized versions like FIt-SNE or reduce dimensionality with PCA first",
"Randomly remove half the points",
"Reduce iterations to 10"
],
"correctAnswerIndex": 1,
"explanation": "Optimized algorithms or PCA pre-reduction improve t-SNE performance on large datasets."
},
{
"id": 62,
"questionText": "Scenario: t-SNE applied on text embeddings shows random patterns. Likely cause?",
"options": [
"Low-dimensional output",
"Dataset too large",
"Random initialization and inappropriate hyperparameters",
"Data normalization applied"
],
"correctAnswerIndex": 2,
"explanation": "Random initialization combined with suboptimal perplexity or learning rate can produce unstable visualizations."
},
{
"id": 63,
"questionText": "t-SNE is particularly useful when:",
"options": [
"High-dimensional data visualization is needed",
"Regression is required",
"Prediction is the goal",
"Clustering as a main task"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE is designed for visualization of complex, high-dimensional datasets."
},
{
"id": 64,
"questionText": "Scenario: t-SNE clusters appear overlapping even after PCA pre-reduction. Recommendation?",
"options": [
"Reduce dataset size further",
"Decrease output dimension to 1D",
"Tune perplexity and learning rate, or increase iterations",
"Switch to raw data"
],
"correctAnswerIndex": 2,
"explanation": "Hyperparameter tuning is key to achieving better separation in t-SNE visualizations."
},
{
"id": 65,
"questionText": "Scenario: t-SNE visualization is chaotic. Possible reasons?",
"options": [
"High learning rate, low perplexity, random initialization",
"PCA used for pre-reduction",
"Data normalization applied",
"Output dimension too large"
],
"correctAnswerIndex": 0,
"explanation": "Improper hyperparameters and random initialization can produce poor or chaotic t-SNE plots."
},
{
"id": 66,
"questionText": "Scenario: You reduce embeddings to 2D with t-SNE, but clusters not apparent. Next step?",
"options": [
"Increase dataset size",
"Change output to 1D",
"Use raw data only",
"Adjust perplexity, learning rate, or perform PCA first"
],
"correctAnswerIndex": 3,
"explanation": "Hyperparameter tuning and preprocessing like PCA can help reveal clusters in t-SNE plots."
},
{
"id": 67,
"questionText": "Scenario: t-SNE applied to 300-dimensional image embeddings, some clusters scattered. Likely reason?",
"options": [
"All of the above",
"Learning rate too low",
"High-dimensional noise, consider PCA pre-reduction",
"Perplexity too high"
],
"correctAnswerIndex": 0,
"explanation": "Noise and improper hyperparameters can scatter clusters; preprocessing and tuning are essential."
},
{
"id": 68,
"questionText": "t-SNE preserves local distances by converting pairwise distances to:",
"options": [
"Probabilities using Gaussian in high-d and t-distribution in low-d",
"Manhattan distance only",
"Euclidean distances only",
"Cosine similarity only"
],
"correctAnswerIndex": 0,
"explanation": "Pairwise distances are converted to conditional probabilities in high-d, and Student’s t-distribution in low-d preserves local similarity."
},
{
"id": 69,
"questionText": "Scenario: You run t-SNE multiple times and get slightly different plots. How to improve consistency?",
"options": [
"Increase output dimension to 5D",
"Decrease dataset size",
"Normalize labels only",
"Use PCA initialization and fix random seed"
],
"correctAnswerIndex": 3,
"explanation": "PCA initialization and fixing random seed reduce variability in t-SNE visualization."
},
{
"id": 70,
"questionText": "Scenario: t-SNE produces compressed clusters in center. Likely cause?",
"options": [
"Crowding problem in low-dimensional space",
"Algorithm failure",
"Learning rate too low",
"Perplexity too high"
],
"correctAnswerIndex": 0,
"explanation": "The crowding problem arises because high-dimensional neighborhoods cannot be perfectly represented in low-dimensional space, causing compression."
},
{
"id": 71,
"questionText": "Scenario: You apply t-SNE on 10,000 image embeddings and clusters appear noisy. Which is the best approach?",
"options": [
"Use PCA to reduce dimensions before t-SNE and tune perplexity",
"Increase learning rate to maximum",
"Use raw pixel values directly",
"Reduce output dimensions to 1D"
],
"correctAnswerIndex": 0,
"explanation": "PCA pre-reduction reduces noise and dimensionality, improving t-SNE visualization on large datasets."
},
{
"id": 72,
"questionText": "Scenario: t-SNE on text embeddings shows overlapping topics. Likely cause?",
"options": [
"Perplexity too low or high, or insufficient iterations",
"Data normalization applied",
"Output dimension too high",
"Embedding size too small"
],
"correctAnswerIndex": 0,
"explanation": "Hyperparameter tuning is essential; low/high perplexity or insufficient iterations can cause overlapping clusters."
},
{
"id": 73,
"questionText": "Scenario: Two clusters in high-dimensional space appear merged in t-SNE plot. What can you do?",
"options": [
"Adjust perplexity, learning rate, or use PCA initialization",
"Increase output dimension to 5D",
"Normalize labels",
"Reduce dataset size randomly"
],
"correctAnswerIndex": 0,
"explanation": "Proper hyperparameter tuning and PCA initialization can help separate clusters that appear merged in low-dimensional mapping."
},
{
"id": 74,
"questionText": "Scenario: Running t-SNE on genomic data, you notice small clusters isolated. Reason?",
"options": [
"Perplexity may be low, emphasizing very local neighborhoods",
"High learning rate",
"Output dimension too high",
"Data normalization missing"
],
"correctAnswerIndex": 0,
"explanation": "Low perplexity focuses on very local neighborhoods, potentially isolating small clusters in visualization."
},
{
"id": 75,
"questionText": "Scenario: t-SNE produces different visualizations on repeated runs. How to stabilize?",
"options": [
"Use PCA initialization and fix random seed",
"Reduce dataset size",
"Increase output dimension beyond 3D",
"Use raw data without scaling"
],
"correctAnswerIndex": 0,
"explanation": "PCA initialization and setting a fixed random seed reduce stochastic variation in t-SNE results."
},
{
"id": 76,
"questionText": "Scenario: t-SNE on high-dimensional sensor data shows tight clusters but global distances are distorted. Interpretation?",
"options": [
"Local structure preserved; global distances are not maintained",
"Algorithm failed",
"Data incorrectly scaled",
"Output dimension wrong"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE preserves local pairwise relationships; global distances can appear distorted in 2D/3D visualization."
},
{
"id": 77,
"questionText": "Scenario: Clusters appear fragmented after t-SNE on customer embeddings. Likely reason?",
"options": [
"Perplexity too low",
"Learning rate too high",
"Data normalization missing",
"All of the above"
],
"correctAnswerIndex": 3,
"explanation": "Low perplexity, high learning rate, or improper feature scaling can fragment clusters in t-SNE plots."
},
{
"id": 78,
"questionText": "Scenario: Applying t-SNE to visualize embeddings after deep learning model training. Best preprocessing?",
"options": [
"Normalize features and optionally use PCA to reduce dimensions",
"Use raw embeddings directly",
"Randomly shuffle dimensions",
"Use first two features only"
],
"correctAnswerIndex": 0,
"explanation": "Normalization and PCA pre-reduction enhance t-SNE visualization quality for deep embeddings."
},
{
"id": 79,
"questionText": "Scenario: Clusters appear compressed in center of t-SNE plot. Likely cause?",
"options": [
"Crowding problem inherent to low-dimensional mapping",
"Algorithm failed",
"Perplexity too high",
"Learning rate too low"
],
"correctAnswerIndex": 0,
"explanation": "Crowding problem occurs because high-dimensional neighborhoods cannot be perfectly represented in low dimensions, causing compression."
},
{
"id": 80,
"questionText": "Scenario: After t-SNE, similar data points are far apart in 2D. Likely reason?",
"options": [
"Hyperparameters not tuned correctly",
"Data normalization failed",
"Output dimension too high",
"Labels missing"
],
"correctAnswerIndex": 0,
"explanation": "Incorrect perplexity, learning rate, or insufficient iterations can cause similar points to appear far apart."
},
{
"id": 81,
"questionText": "Scenario: t-SNE on embeddings shows elongated clusters. Best action?",
"options": [
"Adjust perplexity and learning rate, or increase iterations",
"Reduce output dimension to 1D",
"Use raw embeddings without scaling",
"Remove random points"
],
"correctAnswerIndex": 0,
"explanation": "Cluster elongation often occurs due to suboptimal hyperparameters; tuning and more iterations can improve results."
},
{
"id": 82,
"questionText": "Scenario: Visualizing 50,000 text embeddings with t-SNE is very slow. Solution?",
"options": [
"Use FIt-SNE or Barnes-Hut t-SNE for faster computation",
"Reduce perplexity to 1",
"Use raw text instead of embeddings",
"Decrease output dimensions to 1D"
],
"correctAnswerIndex": 0,
"explanation": "Optimized t-SNE implementations significantly speed up visualization of large datasets."
},
{
"id": 83,
"questionText": "Scenario: t-SNE clusters overlap despite tuning. Next step?",
"options": [
"Consider alternative dimensionality reduction methods like UMAP",
"Reduce output dimensions further",
"Use raw data without embeddings",
"Remove labels"
],
"correctAnswerIndex": 0,
"explanation": "If t-SNE cannot separate clusters even after tuning, UMAP or other DR methods might better preserve structure."
},
{
"id": 84,
"questionText": "Scenario: t-SNE on protein expression data shows some scattered clusters. Likely cause?",
"options": [
"Noise in high-dimensional data",
"Insufficient iterations",
"Suboptimal hyperparameters",
"All of the above"
],
"correctAnswerIndex": 3,
"explanation": "Noise and suboptimal hyperparameters can cause scattered clusters; preprocessing and tuning help visualization."
},
{
"id": 85,
"questionText": "Scenario: You apply t-SNE after PCA to reduce 500D embeddings to 50D. Why?",
"options": [
"Reduce computation and noise while retaining important variance",
"Increase global distance preservation",
"Generate labels automatically",
"Visualize in 3D directly"
],
"correctAnswerIndex": 0,
"explanation": "PCA pre-reduction helps t-SNE efficiently process high-dimensional data while keeping meaningful structure."
},
{
"id": 86,
"questionText": "Scenario: Clusters appear stretched along a single axis. Likely cause?",
"options": [
"Perplexity too high or learning rate too low",
"Algorithm failure",
"Data normalization missing",
"Incorrect output dimension"
],
"correctAnswerIndex": 0,
"explanation": "Improper hyperparameters can cause cluster elongation in low-dimensional embeddings."
},
{
"id": 87,
"questionText": "Scenario: t-SNE applied to 300-dimensional embeddings of customer behavior. Output 2D. What can distort clusters?",
"options": [
"Random initialization, hyperparameters, noisy features",
"Data scaling applied",
"Output dimension too high",
"Label missing"
],
"correctAnswerIndex": 0,
"explanation": "Cluster distortion occurs due to noise, initialization randomness, and hyperparameter settings."
},
{
"id": 88,
"questionText": "Scenario: t-SNE output differs between runs. Best practice to make consistent?",
"options": [
"Fix random seed and use PCA initialization",
"Use raw data directly",
"Reduce output dimension to 1D",
"Change KL divergence formula"
],
"correctAnswerIndex": 0,
"explanation": "Fixed random seed and PCA initialization reduce stochastic variation across runs."
},
{
"id": 89,
"questionText": "Scenario: t-SNE applied to embeddings shows overlapping clusters, perplexity set to 5. Recommendation?",
"options": [
"Increase perplexity to consider more neighbors",
"Reduce learning rate",
"Decrease output dimension to 1D",
"Use raw high-dimensional features"
],
"correctAnswerIndex": 0,
"explanation": "Low perplexity can underrepresent neighborhood structure; increasing perplexity may separate clusters better."
},
{
"id": 90,
"questionText": "Scenario: After t-SNE, similar embeddings appear scattered. Likely hyperparameter issue?",
"options": [
"Learning rate too high or perplexity not optimal",
"Output dimension too high",
"Labels missing",
"Data normalized incorrectly"
],
"correctAnswerIndex": 0,
"explanation": "Improper learning rate or perplexity can scatter similar points, reducing visualization quality."
},
{
"id": 91,
"questionText": "Scenario: t-SNE shows different cluster sizes for similar data. Why?",
"options": [
"Local density differences and crowding problem in low-dimensional space",
"Algorithm failure",
"Incorrect output dimension",
"Data normalization missing"
],
"correctAnswerIndex": 0,
"explanation": "t-SNE preserves local structure; dense regions appear larger and sparse regions smaller due to crowding problem."
},
{
"id": 92,
"questionText": "Scenario: You want faster t-SNE on 100,000 points. Recommendation?",
"options": [
"Use Barnes-Hut or FIt-SNE approximation",
"Reduce output dimension to 1D",
"Use raw data without scaling",
"Decrease perplexity to 1"
],
"correctAnswerIndex": 0,
"explanation": "Optimized t-SNE versions reduce computation and memory for large datasets."
},
{
"id": 93,
"questionText": "Scenario: t-SNE shows elongated clusters. Likely hyperparameter adjustment?",
"options": [
"Adjust perplexity and learning rate, or increase iterations",
"Reduce dataset size",
"Use raw data",
"Remove features randomly"
],
"correctAnswerIndex": 0,
"explanation": "Cluster elongation often occurs due to improper hyperparameters; tuning can improve visualization."
},
{
"id": 94,
"questionText": "Scenario: t-SNE on embedding shows isolated points far from clusters. Likely reason?",
"options": [
"Outliers or low perplexity emphasizing local neighborhoods",
"Algorithm failure",
"Output dimension too high",
"Data normalization missing"
],
"correctAnswerIndex": 0,
"explanation": "Outliers or very low perplexity can cause points to appear isolated in visualization."
},
{
"id": 95,
"questionText": "Scenario: You want t-SNE results reproducible across runs. Steps?",
"options": [
"Fix random seed, use PCA initialization, standardize features",
"Increase output dimensions",
"Reduce dataset size",
"Use raw data"
],
"correctAnswerIndex": 0,
"explanation": "Reproducibility requires controlling randomness and preprocessing consistently."
},
{
"id": 96,
"questionText": "Scenario: t-SNE applied to multi-class embeddings, some classes overlap. Best solution?",
"options": [
"Tune perplexity, learning rate, or try PCA initialization",
"Reduce number of classes",
"Change output to 1D",
"Use raw features without preprocessing"
],
"correctAnswerIndex": 0,
"explanation": "Hyperparameter tuning and PCA initialization often improve cluster separation for multi-class data."
},
{
"id": 97,
"questionText": "Scenario: t-SNE visualization shows tight clusters compressed together. Likely cause?",
"options": [
"Crowding problem and insufficient perplexity",
"Algorithm failure",
"Output dimension too high",
"Data not normalized"
],
"correctAnswerIndex": 0,
"explanation": "Crowding problem causes clusters to compress in low-dimensional space, especially with suboptimal perplexity."
},
{
"id": 98,
"questionText": "Scenario: You apply t-SNE on embeddings with high noise. Recommended step?",
"options": [
"Denoise or reduce dimensionality with PCA before t-SNE",
"Use raw embeddings",
"Reduce output dimension to 1D",
"Increase iterations without preprocessing"
],
"correctAnswerIndex": 0,
"explanation": "Preprocessing helps t-SNE focus on meaningful structure rather than noise."
},
{
"id": 99,
"questionText": "Scenario: t-SNE visualization shows variable cluster shapes between runs. Solution?",
"options": [
"Fix random seed and use PCA initialization",
"Reduce dataset size",
"Change output dimension to 1D",
"Normalize labels"
],
"correctAnswerIndex": 0,
"explanation": "Fixing seed and PCA initialization stabilizes t-SNE output across runs."
},
{
"id": 100,
"questionText": "Scenario: t-SNE applied to 500D embeddings, some clusters overlapping. Recommended approach?",
"options": [
"Try PCA pre-reduction, adjust perplexity and learning rate, increase iterations",
"Reduce output dimension to 1D",
"Use raw features",
"Remove labels"
],
"correctAnswerIndex": 0,
"explanation": "Proper preprocessing and hyperparameter tuning help t-SNE separate overlapping clusters."
}
]
}