{ "title": "Principal Component Analysis (PCA) Mastery: 100 MCQs", "description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Principal Component Analysis (PCA), from basic concepts to advanced applications in dimensionality reduction and feature engineering.", "questions": [ { "id": 1, "questionText": "What is the main goal of Principal Component Analysis (PCA)?", "options": [ "To classify data into categories", "To generate random features", "To cluster data points", "To reduce the dimensionality of a dataset while retaining most variance" ], "correctAnswerIndex": 3, "explanation": "PCA aims to reduce the number of variables in a dataset while preserving as much variability as possible." }, { "id": 2, "questionText": "In PCA, what does a 'principal component' represent?", "options": [ "The cluster center", "An original feature in the dataset", "A new uncorrelated feature that captures maximum variance", "The mean of all features" ], "correctAnswerIndex": 2, "explanation": "Principal components are linear combinations of original features that are uncorrelated and ordered by the amount of variance they capture." }, { "id": 3, "questionText": "What is the first step before applying PCA?", "options": [ "Standardize or normalize the data", "Train a classifier", "Apply k-means clustering", "Remove outliers only" ], "correctAnswerIndex": 0, "explanation": "Standardization ensures that features with different scales contribute equally to the principal components." }, { "id": 4, "questionText": "Scenario: You have 10 features, but 90% of the variance is captured in 2 components. What can you do?", "options": [ "Keep all 10 features", "Apply dropout", "Add more features", "Reduce the dataset to 2 principal components" ], "correctAnswerIndex": 3, "explanation": "Reducing to 2 principal components retains most of the information while simplifying the dataset." }, { "id": 5, "questionText": "Which mathematical technique is commonly used to compute PCA?", "options": [ "Eigen decomposition of the covariance matrix", "Linear regression", "Gradient descent", "k-Nearest Neighbors" ], "correctAnswerIndex": 0, "explanation": "PCA typically involves computing eigenvectors and eigenvalues of the covariance matrix to find principal components." }, { "id": 6, "questionText": "What property do all principal components have?", "options": [ "They are uncorrelated (orthogonal) to each other", "They sum to zero", "They are dependent on each other", "They always have the same variance" ], "correctAnswerIndex": 0, "explanation": "Principal components are constructed to be orthogonal, ensuring no redundancy in the information they capture." }, { "id": 7, "questionText": "Scenario: PCA applied on a dataset with features in different scales. What happens if you don’t standardize?", "options": [ "The first component captures zero variance", "PCA fails to compute", "Features with larger scale dominate the principal components", "Variance is automatically normalized" ], "correctAnswerIndex": 2, "explanation": "Without standardization, features with larger numeric ranges contribute more to variance, skewing PCA results." }, { "id": 8, "questionText": "Which of the following can PCA NOT do?", "options": [ "Remove correlated features", "Reduce dimensionality", "Improve classification accuracy directly", "Visualize high-dimensional data" ], "correctAnswerIndex": 2, "explanation": "PCA is unsupervised and reduces dimensionality; it does not directly improve classification accuracy." }, { "id": 9, "questionText": "Scenario: You apply PCA and find that the first principal component explains 70% variance, the second 20%, and the rest 10%. How many components would you keep to retain 90% variance?", "options": [ "All components", "One component", "Two components", "Three components" ], "correctAnswerIndex": 2, "explanation": "Adding the first two components (70% + 20%) captures 90% of the dataset variance." }, { "id": 10, "questionText": "PCA is an example of which type of learning?", "options": [ "Supervised learning", "Unsupervised learning", "Reinforcement learning", "Semi-supervised learning" ], "correctAnswerIndex": 1, "explanation": "PCA does not use labels; it finds patterns based on feature correlations, making it unsupervised." }, { "id": 11, "questionText": "Scenario: You perform PCA and transform your 5D data to 3D. What property is guaranteed?", "options": [ "Classification accuracy improves", "The 3 components capture the maximum possible variance in 3D", "All data points remain equidistant", "The original features are perfectly preserved" ], "correctAnswerIndex": 1, "explanation": "PCA selects components capturing maximum variance in the reduced dimensions, not necessarily preserving original distances." }, { "id": 12, "questionText": "Which PCA component explains the least variance?", "options": [ "All components explain equal variance", "Any intermediate component", "The last principal component", "The first principal component" ], "correctAnswerIndex": 2, "explanation": "PCA orders components from highest to lowest variance." }, { "id": 13, "questionText": "Scenario: Dataset is already perfectly uncorrelated. What effect does PCA have?", "options": [ "PCA fails to compute", "Principal components are the same as original features", "PCA increases correlation", "PCA reduces variance" ], "correctAnswerIndex": 1, "explanation": "If features are uncorrelated, PCA simply aligns components with original features without reducing dimensionality." }, { "id": 14, "questionText": "Scenario: You want to visualize high-dimensional data in 2D. PCA is applied. What is the risk?", "options": [ "Data labels change", "All variance is retained", "Some variance is lost", "Original features are unchanged" ], "correctAnswerIndex": 2, "explanation": "Reducing dimensions to 2D inevitably loses some information (variance)." }, { "id": 15, "questionText": "Which of the following matrices is symmetric and used in PCA?", "options": [ "Distance matrix", "Covariance matrix", "Adjacency matrix", "Correlation matrix" ], "correctAnswerIndex": 1, "explanation": "The covariance matrix is symmetric and serves as the basis for eigen decomposition in PCA." }, { "id": 16, "questionText": "Scenario: You have highly correlated features. PCA is applied. Effect?", "options": [ "Normalizes variance", "Reduces redundancy by combining correlated features into fewer components", "Removes labels", "Increases correlation" ], "correctAnswerIndex": 1, "explanation": "PCA transforms correlated features into uncorrelated principal components, reducing redundancy." }, { "id": 17, "questionText": "Scenario: You want to reduce noise in data. PCA helps by:", "options": [ "Scaling the first component only", "Adding more features", "Increasing learning rate", "Discarding components with low variance" ], "correctAnswerIndex": 3, "explanation": "Low-variance components often represent noise; removing them denoises the dataset." }, { "id": 18, "questionText": "Scenario: Eigenvalues of a PCA covariance matrix are [4, 2, 1]. Which component explains the most variance?", "options": [ "Second component", "All equally", "First component", "Third component" ], "correctAnswerIndex": 2, "explanation": "Eigenvalues correspond to the variance captured; the largest eigenvalue indicates the component with most variance." }, { "id": 19, "questionText": "Scenario: You apply PCA to a dataset and plot explained variance ratio. Purpose?", "options": [ "Scale features", "Compute correlation matrix", "Determine how many components to keep", "Train a classifier" ], "correctAnswerIndex": 2, "explanation": "Explained variance ratio helps decide how many components capture sufficient information." }, { "id": 20, "questionText": "Which PCA component is always orthogonal to the first component?", "options": [ "It may not be orthogonal", "Third component", "Second component", "Last component only" ], "correctAnswerIndex": 2, "explanation": "By definition, each principal component is orthogonal to all previous components." }, { "id": 21, "questionText": "Scenario: After PCA, some components have very small variance. Action?", "options": [ "They must be retained", "Scale them up", "Add noise to them", "They can be discarded" ], "correctAnswerIndex": 3, "explanation": "Components with negligible variance contribute little to data representation and can be removed." }, { "id": 22, "questionText": "Scenario: PCA applied to non-linear data. Limitation?", "options": [ "PCA generates labels", "PCA cannot capture non-linear relationships", "PCA increases variance", "PCA overfits" ], "correctAnswerIndex": 1, "explanation": "Standard PCA is linear and cannot model complex non-linear structures; kernel PCA may be used instead." }, { "id": 23, "questionText": "Scenario: PCA reduces features from 5D to 2D. Data reconstruction is approximate. Why?", "options": [ "Labels change", "Information is lost in discarded components", "PCA adds noise", "Variance is increased" ], "correctAnswerIndex": 1, "explanation": "Dimensionality reduction retains only top components, losing some original information." }, { "id": 24, "questionText": "Which metric is used to measure how much variance is captured by selected components?", "options": [ "Mean squared error", "Correlation coefficient", "Explained variance ratio", "Euclidean distance" ], "correctAnswerIndex": 2, "explanation": "Explained variance ratio shows the proportion of total variance captured by each principal component." }, { "id": 25, "questionText": "Scenario: PCA on standardized data vs. unstandardized data. Difference?", "options": [ "Standardization reduces variance", "Standardized data gives equal weight to all features", "Unstandardized data improves variance capture", "No difference" ], "correctAnswerIndex": 1, "explanation": "Standardization prevents features with large scales from dominating the PCA components." }, { "id": 26, "questionText": "Scenario: Two features are perfectly correlated. PCA effect?", "options": [ "Cannot perform PCA", "One component captures the shared variance", "Both components are kept equally", "Variance becomes zero" ], "correctAnswerIndex": 1, "explanation": "PCA combines correlated features into a single principal component." }, { "id": 27, "questionText": "Scenario: You want to visualize 3D data in 2D. PCA helps by:", "options": [ "Adding more dimensions", "Projecting onto top 2 principal components", "Scaling features only", "Generating new labels" ], "correctAnswerIndex": 1, "explanation": "Projection onto top principal components preserves as much variance as possible in reduced dimensions." }, { "id": 28, "questionText": "Scenario: After PCA, you notice negative values in transformed features. Meaning?", "options": [ "PCA failed", "Original data must be negative", "Data must be scaled again", "Principal components can have negative and positive values" ], "correctAnswerIndex": 3, "explanation": "PCA components are linear combinations of original features, allowing both negative and positive values." }, { "id": 29, "questionText": "Scenario: You apply PCA for feature selection. Best approach?", "options": [ "Discard largest components", "Select random components", "Keep all features", "Select top components explaining desired variance" ], "correctAnswerIndex": 3, "explanation": "Top principal components capture most variance and are most informative for feature selection." }, { "id": 30, "questionText": "Scenario: PCA is applied to a dataset with 100 features. First 10 components explain 95% variance. Next step?", "options": [ "Use all 100 features", "Discard the 10 components", "Add more features", "Use 10 components for reduced dataset" ], "correctAnswerIndex": 3, "explanation": "Using the first 10 components retains 95% of information while reducing dimensionality significantly." }, { "id": 31, "questionText": "Scenario: You apply PCA but some features dominate due to large variance. Solution?", "options": [ "Standardize the features", "Remove features with high variance", "Apply k-means clustering", "Reduce dataset size" ], "correctAnswerIndex": 0, "explanation": "Standardizing ensures all features contribute equally, preventing dominance of large-scale features." }, { "id": 32, "questionText": "Scenario: PCA eigenvalues are [5, 2, 0.5, 0.1]. What does the smallest eigenvalue indicate?", "options": [ "Largest variance", "Component is most important", "Least variance along that component", "PCA failed" ], "correctAnswerIndex": 2, "explanation": "Eigenvalues represent variance along principal components; the smallest captures minimal variance." }, { "id": 33, "questionText": "Scenario: Two components have similar eigenvalues. What to do?", "options": [ "Discard one randomly", "Keep both as they explain similar variance", "Combine them manually", "Always choose the first" ], "correctAnswerIndex": 1, "explanation": "Similar eigenvalues indicate both components carry significant information; both should be retained." }, { "id": 34, "questionText": "Scenario: You want to analyze which original features contribute to a principal component. Technique?", "options": [ "Check explained variance ratio only", "Remove low variance features", "Examine component loadings (eigenvectors)", "Normalize the dataset" ], "correctAnswerIndex": 2, "explanation": "Loadings show the weight of each original feature in a principal component, indicating contribution." }, { "id": 35, "questionText": "Scenario: PCA applied on correlation matrix vs covariance matrix. Difference?", "options": [ "Correlation matrix standardizes features; covariance matrix uses original scale", "No difference", "Covariance matrix reduces variance", "Correlation matrix increases variance" ], "correctAnswerIndex": 0, "explanation": "Correlation matrix accounts for differing scales by standardizing variables before computing PCA." }, { "id": 36, "questionText": "Scenario: PCA is applied to noisy data. Effect of noise?", "options": [ "Noise appears in low-variance components", "Noise improves variance", "Noise is amplified in all components", "Noise disappears automatically" ], "correctAnswerIndex": 0, "explanation": "High-frequency noise often contributes little variance and is captured in later components, which can be discarded." }, { "id": 37, "questionText": "Scenario: You want to visualize 4D data in 2D using PCA. Which components to use?", "options": [ "Last 2 components", "All 4 components", "Top 2 principal components", "Random 2 features" ], "correctAnswerIndex": 2, "explanation": "Top components retain most variance, providing the best 2D representation of high-dimensional data." }, { "id": 38, "questionText": "Scenario: PCA applied on dataset with zero mean. Why mean-centering?", "options": [ "Normalizes labels", "Increases variance artificially", "Reduces number of features", "Ensures first component captures maximum variance from origin" ], "correctAnswerIndex": 3, "explanation": "Mean-centering removes bias and ensures principal components represent variance relative to the mean." }, { "id": 39, "questionText": "Scenario: PCA applied but first component explains only 20% variance. Interpretation?", "options": [ "First component is irrelevant", "Data variance is spread across many components", "Reduce dataset size", "PCA failed" ], "correctAnswerIndex": 1, "explanation": "Low variance in first component indicates no single direction dominates; variance is more uniform across features." }, { "id": 40, "questionText": "Scenario: You perform PCA on features measured in different units. Why important?", "options": [ "To generate labels", "To increase explained variance", "To remove features", "To prevent unit differences from skewing components" ], "correctAnswerIndex": 3, "explanation": "Standardization equalizes units, ensuring PCA reflects intrinsic data structure rather than measurement scale." }, { "id": 41, "questionText": "Scenario: Projecting data back from reduced PCA components to original space. Effect?", "options": [ "Perfect reconstruction always", "Approximate reconstruction with some information loss", "Increase variance", "Remove correlations" ], "correctAnswerIndex": 1, "explanation": "Dimensionality reduction discards minor components, so reconstruction is approximate." }, { "id": 42, "questionText": "Scenario: PCA on sparse high-dimensional data. Which method can help?", "options": [ "Sparse PCA", "Random projection", "Feature scaling", "Standard PCA only" ], "correctAnswerIndex": 0, "explanation": "Sparse PCA introduces sparsity constraints to handle high-dimensional data efficiently." }, { "id": 43, "questionText": "Scenario: Kernel PCA vs standard PCA. Advantage?", "options": [ "Kernel PCA reduces variance", "Kernel PCA does not reduce dimensions", "Kernel PCA captures non-linear patterns", "Standard PCA is better for non-linear data" ], "correctAnswerIndex": 2, "explanation": "Kernel PCA uses kernel functions to capture non-linear relationships, unlike linear PCA." }, { "id": 44, "questionText": "Scenario: PCA applied to dataset, but covariance matrix is singular. Cause?", "options": [ "Data standardized", "Number of features > number of samples", "Explained variance too high", "Too few features" ], "correctAnswerIndex": 1, "explanation": "A singular covariance matrix occurs when the data matrix has more features than samples, causing linear dependency." }, { "id": 45, "questionText": "Scenario: After PCA, a component has zero eigenvalue. Meaning?", "options": [ "Data is invalid", "No variance along this component", "PCA failed", "Most important component" ], "correctAnswerIndex": 1, "explanation": "Zero eigenvalue indicates the component captures no variation in the dataset." }, { "id": 46, "questionText": "Scenario: PCA used for feature reduction in regression. Benefit?", "options": [ "Removes labels", "Increases overfitting", "Generates noise", "Reduces multicollinearity and model complexity" ], "correctAnswerIndex": 3, "explanation": "PCA produces uncorrelated features, mitigating multicollinearity and simplifying models." }, { "id": 47, "questionText": "Scenario: PCA shows first 3 components explain 85% variance. Choice?", "options": [ "Keep 3 components for reduced dataset", "Keep only first", "Discard all 3", "Keep all original features" ], "correctAnswerIndex": 0, "explanation": "Top components capturing majority variance are sufficient for dimensionality reduction." }, { "id": 48, "questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?", "options": [ "Important variance discarded in low components", "Features were not standardized", "Too few principal components retained", "All of the above" ], "correctAnswerIndex": 3, "explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features." }, { "id": 49, "questionText": "Scenario: PCA on image dataset. First component represents lighting changes. Meaning?", "options": [ "Reduce dataset size", "Lighting has no effect", "Largest variance is due to lighting, not object content", "PCA failed" ], "correctAnswerIndex": 2, "explanation": "PCA captures directions of maximal variance; if lighting dominates, first component encodes lighting." }, { "id": 50, "questionText": "Scenario: You need interpretability for principal components. Technique?", "options": [ "Examine feature loadings", "Discard low variance components", "Use explained variance only", "Standardize data" ], "correctAnswerIndex": 0, "explanation": "Loadings show contribution of each original feature to principal components, aiding interpretation." }, { "id": 51, "questionText": "Scenario: PCA applied on time-series features. Issue?", "options": [ "Eigenvalues become negative", "Labels are affected", "Variance is increased", "Temporal structure may be lost" ], "correctAnswerIndex": 3, "explanation": "Standard PCA ignores sequence information; temporal relationships may not be preserved." }, { "id": 52, "questionText": "Scenario: Dataset contains categorical variables. PCA requirement?", "options": [ "Convert to numerical via encoding", "Remove them", "No change required", "Use labels directly" ], "correctAnswerIndex": 0, "explanation": "PCA requires numeric input; categorical features must be encoded first (e.g., one-hot encoding)." }, { "id": 53, "questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?", "options": [ "Increases data size", "Reduces noise and speeds computation", "Removes clusters", "Generates labels" ], "correctAnswerIndex": 1, "explanation": "PCA simplifies data, removes redundant features, and accelerates clustering algorithms." }, { "id": 54, "questionText": "Scenario: After PCA, components are used in regression. Advantage?", "options": [ "Remove labels", "Avoid multicollinearity and improve stability", "Increases overfitting", "Increase computation" ], "correctAnswerIndex": 1, "explanation": "Principal components are uncorrelated, reducing multicollinearity in regression." }, { "id": 55, "questionText": "Scenario: PCA applied to normalized vs standardized features. Impact?", "options": [ "No impact", "Normalized features fail PCA", "Standardization is crucial for unequal scales", "Variance is reduced" ], "correctAnswerIndex": 2, "explanation": "Features with different scales must be standardized; normalization alone may not equalize contribution." }, { "id": 56, "questionText": "Scenario: First principal component explains 40% variance, second 25%. What % variance is left?", "options": [ "40%", "35%", "25%", "65%" ], "correctAnswerIndex": 1, "explanation": "Remaining variance = 100% - (40% + 25%) = 35%." }, { "id": 57, "questionText": "Scenario: PCA used on finance dataset. First component dominated by one stock. Meaning?", "options": [ "Data should be reduced", "This stock has highest variance in data", "Stock is irrelevant", "PCA failed" ], "correctAnswerIndex": 1, "explanation": "Principal components capture directions of maximum variance; one high-variance stock can dominate." }, { "id": 58, "questionText": "Scenario: PCA applied on small dataset. Risk?", "options": [ "PCA fails", "Components become identical", "Overfitting and noisy components", "Variance increases" ], "correctAnswerIndex": 2, "explanation": "Small datasets may produce unstable covariance estimates, leading to noisy components." }, { "id": 59, "questionText": "Scenario: You want to project new data using previously fitted PCA. Method?", "options": [ "Cannot project new data", "Recompute PCA", "Multiply new data by learned component matrix", "Use labels only" ], "correctAnswerIndex": 2, "explanation": "New data can be projected by applying the PCA transformation learned from training data." }, { "id": 60, "questionText": "Scenario: PCA shows negative loadings. Meaning?", "options": [ "Error in computation", "Variance is negative", "Feature removed", "Feature negatively correlates with component" ], "correctAnswerIndex": 3, "explanation": "Negative loadings indicate the original feature moves in opposite direction to the component." }, { "id": 61, "questionText": "Scenario: PCA applied to very high-dimensional genomic data. Challenge?", "options": [ "Variance is too high", "Cannot compute eigenvectors", "Labels cannot be used", "Covariance matrix may be singular or noisy" ], "correctAnswerIndex": 3, "explanation": "High dimensionality with few samples can make the covariance matrix singular and PCA unstable." }, { "id": 62, "questionText": "Scenario: PCA on dataset with outliers. Effect?", "options": [ "PCA removes outliers", "Outliers may distort principal components", "Outliers are ignored automatically", "Variance increases uniformly" ], "correctAnswerIndex": 1, "explanation": "Outliers can dominate variance, affecting directions of principal components." }, { "id": 63, "questionText": "Scenario: PCA applied for compression. Target explained variance?", "options": [ "Keep all components", "Keep only first component", "Choose enough components to capture 90–95% variance", "Discard top components" ], "correctAnswerIndex": 2, "explanation": "Selecting components that retain most variance ensures compression without losing significant information." }, { "id": 64, "questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?", "options": [ "PCA cannot be applied to numeric data", "Too few principal components retained", "All of the above", "Features were not standardized", "Important variance discarded in low components" ], "correctAnswerIndex": 2, "explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features." }, { "id": 65, "questionText": "Scenario: PCA used for exploratory analysis. Benefit?", "options": [ "Removes labels", "Increases dimensionality", "Generates random features", "Reveals patterns, clusters, and correlations" ], "correctAnswerIndex": 3, "explanation": "PCA simplifies data and highlights underlying patterns or groupings." }, { "id": 66, "questionText": "Scenario: PCA reduces dataset from 50 to 10 features. Effect on storage?", "options": [ "Significant reduction in storage and computation", "Increases storage", "No change", "Removes labels" ], "correctAnswerIndex": 0, "explanation": "Fewer features reduce memory usage and accelerate computations." }, { "id": 67, "questionText": "Scenario: PCA used on correlation matrix. Advantage?", "options": [ "Reduces variance", "Generates labels", "Equalizes feature scales and emphasizes relative relationships", "Removes low-variance features only" ], "correctAnswerIndex": 2, "explanation": "Correlation matrix ensures features with different units or scales contribute proportionally to PCA." }, { "id": 68, "questionText": "Scenario: After PCA, some features have nearly zero loadings across components. Meaning?", "options": [ "Variance is too high", "These features contribute little variance and can be discarded", "They are most important", "PCA failed" ], "correctAnswerIndex": 1, "explanation": "Features with negligible loadings do not influence principal components significantly." }, { "id": 69, "questionText": "Scenario: PCA applied to dataset with correlated noise. Effect?", "options": [ "All variance captured by noise", "Noise may form separate low-variance components", "PCA fails", "Noise dominates first component" ], "correctAnswerIndex": 1, "explanation": "Correlated noise often appears in later components with low variance." }, { "id": 70, "questionText": "Scenario: You want to reduce dimensionality without losing much information. PCA strategy?", "options": [ "Use all components", "Keep enough components to capture desired variance (e.g., 90–95%)", "Keep only first component", "Discard components randomly" ], "correctAnswerIndex": 1, "explanation": "Selecting enough principal components ensures dimensionality reduction while retaining most data information." }, { "id": 71, "questionText": "Scenario: Kernel PCA is used instead of standard PCA. Benefit?", "options": [ "Reduces dimensionality linearly", "Captures non-linear relationships in the data", "Removes outliers automatically", "Generates labels" ], "correctAnswerIndex": 1, "explanation": "Kernel PCA uses kernel functions to map data into higher-dimensional space to capture non-linear patterns." }, { "id": 72, "questionText": "Scenario: Sparse PCA is applied on high-dimensional genomic data. Advantage?", "options": [ "Improves label prediction automatically", "Maximizes variance only", "Generates components with few non-zero loadings for interpretability", "Removes all correlations" ], "correctAnswerIndex": 2, "explanation": "Sparse PCA introduces sparsity constraints, creating components influenced by fewer original features for easier interpretation." }, { "id": 73, "questionText": "Scenario: PCA is applied to compress image data. How to measure quality of compression?", "options": [ "Variance ratio only", "Correlation of first component with pixels", "Reconstruction error (difference between original and reconstructed images)", "Number of components kept" ], "correctAnswerIndex": 2, "explanation": "Reconstruction error quantifies information loss during dimensionality reduction, evaluating compression quality." }, { "id": 74, "questionText": "Scenario: PCA applied to multicollinear financial features. Effect?", "options": [ "Increases collinearity", "Reduces multicollinearity by generating uncorrelated components", "Removes variance", "Generates labels" ], "correctAnswerIndex": 1, "explanation": "Principal components are orthogonal, addressing multicollinearity issues in regression or predictive models." }, { "id": 75, "questionText": "Scenario: PCA applied on large sparse document-term matrix. Challenge?", "options": [ "PCA cannot be applied", "High dimensionality and sparsity require optimized algorithms", "All features dominate equally", "Variance becomes negative" ], "correctAnswerIndex": 1, "explanation": "Sparse high-dimensional data may need techniques like randomized PCA to efficiently compute components." }, { "id": 76, "questionText": "Scenario: PCA applied to time-series data. Concern?", "options": [ "Labels are changed", "Temporal correlations may be ignored", "PCA reduces samples", "Variance increases" ], "correctAnswerIndex": 1, "explanation": "PCA does not account for order in sequences; temporal patterns may be lost." }, { "id": 77, "questionText": "Scenario: PCA reduces 100 features to 5 components. Downstream model performance drops. Likely cause?", "options": [ "All of the above", "Data not standardized", "Too few samples", "Important low-variance features were discarded" ], "correctAnswerIndex": 0, "explanation": "Discarding low-variance features may remove predictive information; other preprocessing issues can also affect performance." }, { "id": 78, "questionText": "Scenario: PCA is used for anomaly detection. Approach?", "options": [ "Discard all components", "Use first component only", "Model normal data with top components and examine reconstruction error", "Apply PCA on labels" ], "correctAnswerIndex": 2, "explanation": "Anomalies often lie in directions of low variance; reconstruction error from PCA can identify unusual data points." }, { "id": 79, "questionText": "Scenario: You apply PCA on a dataset with missing values. Best approach?", "options": [ "PCA fills missing values automatically", "Ignore missing values", "Impute missing values before PCA", "Discard rows with missing values" ], "correctAnswerIndex": 2, "explanation": "PCA requires complete numerical data; missing values must be imputed or handled before applying PCA." }, { "id": 80, "questionText": "Scenario: PCA applied and first component aligns with single feature. Interpretation?", "options": [ "This feature dominates variance in the dataset", "Component is irrelevant", "All features are equally important", "PCA failed" ], "correctAnswerIndex": 0, "explanation": "When a single feature dominates variance, the first principal component aligns closely with that feature." }, { "id": 81, "questionText": "Scenario: You perform PCA and observe negative explained variance ratio for a component. Reason?", "options": [ "Data was not mean-centered properly", "Eigenvectors are invalid", "Variance is negative", "PCA cannot run on this data" ], "correctAnswerIndex": 0, "explanation": "Improper centering can lead to incorrect covariance matrix, causing negative variance calculations." }, { "id": 82, "questionText": "Scenario: PCA applied on data with categorical features encoded as one-hot. Concern?", "options": [ "Variance decreases automatically", "Labels are affected", "Components become identical", "High dimensionality may lead to sparse components" ], "correctAnswerIndex": 3, "explanation": "One-hot encoding increases dimensions, producing sparse data; special handling or sparse PCA may be useful." }, { "id": 83, "questionText": "Scenario: After PCA, you plot a biplot. Purpose?", "options": [ "Scale data", "Generate labels", "Visualize principal components and feature contributions", "Remove low-variance components" ], "correctAnswerIndex": 2, "explanation": "Biplots show both projected data points and how original features contribute to components." }, { "id": 84, "questionText": "Scenario: PCA applied on standardized vs non-standardized data with different scales. Outcome?", "options": [ "Standardization ensures fair contribution; non-standardized may bias components", "Non-standardized data improves variance", "No difference", "Variance is reduced in standardized data" ], "correctAnswerIndex": 0, "explanation": "Features with large scales dominate components without standardization, skewing PCA results." }, { "id": 85, "questionText": "Scenario: You want interpretable PCA components. Which approach?", "options": [ "Keep all components", "Sparse PCA or rotation methods like varimax", "Use first component only", "Discard low-variance features" ], "correctAnswerIndex": 1, "explanation": "Sparse PCA and rotation techniques improve interpretability by reducing the number of features contributing to each component." }, { "id": 86, "questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?", "options": [ "Removes noise, reduces computation, highlights clusters", "Increases dimensionality", "Generates labels", "Removes clusters" ], "correctAnswerIndex": 0, "explanation": "Reduced, de-noised features simplify clustering and often improve performance." }, { "id": 87, "questionText": "Scenario: PCA eigenvectors are not unique. Reason?", "options": [ "PCA failed", "Covariance matrix invalid", "Eigenvectors are unique up to sign; direction can flip", "Variance negative" ], "correctAnswerIndex": 2, "explanation": "Eigenvectors can be multiplied by -1 without changing the subspace, so they are not unique in sign." }, { "id": 88, "questionText": "Scenario: PCA applied to data where noise dominates variance. Risk?", "options": [ "Components may represent noise rather than signal", "Variance reduces", "All information preserved", "Components become identical" ], "correctAnswerIndex": 0, "explanation": "High-variance noise can dominate principal components, reducing meaningful representation of data." }, { "id": 89, "questionText": "Scenario: PCA applied on dataset with many features having zero variance. Effect?", "options": [ "These features are ignored in covariance computation", "PCA fails", "Variance increases", "Components become identical" ], "correctAnswerIndex": 0, "explanation": "Zero-variance features do not contribute to covariance and do not affect PCA results." }, { "id": 90, "questionText": "Scenario: PCA applied to compress hyperspectral image. Key consideration?", "options": [ "Retain components capturing most spectral variance for accurate reconstruction", "Discard high-variance components", "Keep only first component", "Generate labels automatically" ], "correctAnswerIndex": 0, "explanation": "Hyperspectral data has many correlated channels; top components capture essential information while reducing data size." }, { "id": 91, "questionText": "Scenario: PCA applied and first component is dominated by outlier. Solution?", "options": [ "Keep data as-is", "Remove or mitigate outliers before PCA", "Scale only first feature", "Discard PCA entirely" ], "correctAnswerIndex": 1, "explanation": "Outliers can skew variance and principal directions; handling them ensures meaningful PCA components." }, { "id": 92, "questionText": "Scenario: You need to project streaming data onto PCA components. Requirement?", "options": [ "Project only first sample", "Cannot apply PCA", "Recompute PCA each time", "Use incremental PCA or precomputed components" ], "correctAnswerIndex": 3, "explanation": "Incremental PCA allows efficient projection of new data without recomputing from scratch." }, { "id": 93, "questionText": "Scenario: PCA reduces dimensionality but variance explained is too low. Solution?", "options": [ "Remove first component", "Normalize data again", "Keep more components", "Discard components" ], "correctAnswerIndex": 2, "explanation": "Increasing number of components ensures more variance is retained for downstream tasks." }, { "id": 94, "questionText": "Scenario: PCA applied on correlated features with different scales. Effect if not standardized?", "options": [ "Variance is evenly distributed", "Components are orthogonal", "PCA fails", "Large-scale features dominate components" ], "correctAnswerIndex": 3, "explanation": "Without standardization, features with larger numeric ranges contribute more variance, skewing PCA results." }, { "id": 95, "questionText": "Scenario: PCA used for dimensionality reduction prior to deep learning. Benefit?", "options": [ "Generates labels", "Increases overfitting", "Reduces input size and noise, improving training efficiency", "Removes all variance" ], "correctAnswerIndex": 2, "explanation": "PCA simplifies input features, removing redundant information and reducing computational load." }, { "id": 96, "questionText": "Scenario: PCA applied but some components are highly correlated. Why unusual?", "options": [ "Principal components should be orthogonal; correlation indicates an issue", "Variance is low", "Labels are influencing components", "Expected in standard PCA" ], "correctAnswerIndex": 0, "explanation": "PCA produces orthogonal components; correlated components suggest computation or preprocessing errors." }, { "id": 97, "questionText": "Scenario: PCA applied on multi-class dataset for visualization. Approach?", "options": [ "Discard labels", "Keep all features", "Project onto top 2 or 3 components and color points by class", "Use only first component" ], "correctAnswerIndex": 2, "explanation": "Low-dimensional projection allows visualization of class separation while preserving maximal variance." }, { "id": 98, "questionText": "Scenario: PCA applied with top components explaining 80% variance. Downstream task requires 95%. Action?", "options": [ "Normalize data again", "Use only top components", "Include additional components until 95% variance is captured", "Discard low components" ], "correctAnswerIndex": 2, "explanation": "Selecting enough components ensures sufficient information is retained for downstream analysis." }, { "id": 99, "questionText": "Scenario: PCA applied to text embeddings. Challenge?", "options": [ "High dimensionality and sparsity require careful computation", "Variance is negative", "PCA fails automatically", "Components lose meaning entirely" ], "correctAnswerIndex": 0, "explanation": "Text embeddings are often high-dimensional; PCA helps reduce size but may require optimized algorithms." }, { "id": 100, "questionText": "Scenario: PCA used for feature selection. How to choose components?", "options": [ "Choose random components", "Discard high-variance components", "Use only first component", "Select components explaining desired variance threshold (e.g., 90–95%)" ], "correctAnswerIndex": 3, "explanation": "Selecting top components ensures maximal retained information while reducing dimensionality." } ] }