{
  "title": "Principal Component Analysis (PCA) Mastery: 100 MCQs",
  "description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Principal Component Analysis (PCA), from basic concepts to advanced applications in dimensionality reduction and feature engineering.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is the main goal of Principal Component Analysis (PCA)?",
      "options": [
        "To classify data into categories",
        "To generate random features",
        "To cluster data points",
        "To reduce the dimensionality of a dataset while retaining most variance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA aims to reduce the number of variables in a dataset while preserving as much variability as possible."
    },
    {
      "id": 2,
      "questionText": "In PCA, what does a 'principal component' represent?",
      "options": [
        "The cluster center",
        "An original feature in the dataset",
        "A new uncorrelated feature that captures maximum variance",
        "The mean of all features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Principal components are linear combinations of original features that are uncorrelated and ordered by the amount of variance they capture."
    },
    {
      "id": 3,
      "questionText": "What is the first step before applying PCA?",
      "options": [
        "Standardize or normalize the data",
        "Train a classifier",
        "Apply k-means clustering",
        "Remove outliers only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Standardization ensures that features with different scales contribute equally to the principal components."
    },
    {
      "id": 4,
      "questionText": "Scenario: You have 10 features, but 90% of the variance is captured in 2 components. What can you do?",
      "options": [
        "Keep all 10 features",
        "Apply dropout",
        "Add more features",
        "Reduce the dataset to 2 principal components"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Reducing to 2 principal components retains most of the information while simplifying the dataset."
    },
    {
      "id": 5,
      "questionText": "Which mathematical technique is commonly used to compute PCA?",
      "options": [
        "Eigen decomposition of the covariance matrix",
        "Linear regression",
        "Gradient descent",
        "k-Nearest Neighbors"
      ],
      "correctAnswerIndex": 0,
      "explanation": "PCA typically involves computing eigenvectors and eigenvalues of the covariance matrix to find principal components."
    },
    {
      "id": 6,
      "questionText": "What property do all principal components have?",
      "options": [
        "They are uncorrelated (orthogonal) to each other",
        "They sum to zero",
        "They are dependent on each other",
        "They always have the same variance"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Principal components are constructed to be orthogonal, ensuring no redundancy in the information they capture."
    },
    {
      "id": 7,
      "questionText": "Scenario: PCA applied on a dataset with features in different scales. What happens if you don’t standardize?",
      "options": [
        "The first component captures zero variance",
        "PCA fails to compute",
        "Features with larger scale dominate the principal components",
        "Variance is automatically normalized"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Without standardization, features with larger numeric ranges contribute more to variance, skewing PCA results."
    },
    {
      "id": 8,
      "questionText": "Which of the following can PCA NOT do?",
      "options": [
        "Remove correlated features",
        "Reduce dimensionality",
        "Improve classification accuracy directly",
        "Visualize high-dimensional data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "PCA is unsupervised and reduces dimensionality; it does not directly improve classification accuracy."
    },
    {
      "id": 9,
      "questionText": "Scenario: You apply PCA and find that the first principal component explains 70% variance, the second 20%, and the rest 10%. How many components would you keep to retain 90% variance?",
      "options": [
        "All components",
        "One component",
        "Two components",
        "Three components"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Adding the first two components (70% + 20%) captures 90% of the dataset variance."
    },
    {
      "id": 10,
      "questionText": "PCA is an example of which type of learning?",
      "options": [
        "Supervised learning",
        "Unsupervised learning",
        "Reinforcement learning",
        "Semi-supervised learning"
      ],
      "correctAnswerIndex": 1,
      "explanation": "PCA does not use labels; it finds patterns based on feature correlations, making it unsupervised."
    },
    {
      "id": 11,
      "questionText": "Scenario: You perform PCA and transform your 5D data to 3D. What property is guaranteed?",
      "options": [
        "Classification accuracy improves",
        "The 3 components capture the maximum possible variance in 3D",
        "All data points remain equidistant",
        "The original features are perfectly preserved"
      ],
      "correctAnswerIndex": 1,
      "explanation": "PCA selects components capturing maximum variance in the reduced dimensions, not necessarily preserving original distances."
    },
    {
      "id": 12,
      "questionText": "Which PCA component explains the least variance?",
      "options": [
        "All components explain equal variance",
        "Any intermediate component",
        "The last principal component",
        "The first principal component"
      ],
      "correctAnswerIndex": 2,
      "explanation": "PCA orders components from highest to lowest variance."
    },
    {
      "id": 13,
      "questionText": "Scenario: Dataset is already perfectly uncorrelated. What effect does PCA have?",
      "options": [
        "PCA fails to compute",
        "Principal components are the same as original features",
        "PCA increases correlation",
        "PCA reduces variance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "If features are uncorrelated, PCA simply aligns components with original features without reducing dimensionality."
    },
    {
      "id": 14,
      "questionText": "Scenario: You want to visualize high-dimensional data in 2D. PCA is applied. What is the risk?",
      "options": [
        "Data labels change",
        "All variance is retained",
        "Some variance is lost",
        "Original features are unchanged"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing dimensions to 2D inevitably loses some information (variance)."
    },
    {
      "id": 15,
      "questionText": "Which of the following matrices is symmetric and used in PCA?",
      "options": [
        "Distance matrix",
        "Covariance matrix",
        "Adjacency matrix",
        "Correlation matrix"
      ],
      "correctAnswerIndex": 1,
      "explanation": "The covariance matrix is symmetric and serves as the basis for eigen decomposition in PCA."
    },
    {
      "id": 16,
      "questionText": "Scenario: You have highly correlated features. PCA is applied. Effect?",
      "options": [
        "Normalizes variance",
        "Reduces redundancy by combining correlated features into fewer components",
        "Removes labels",
        "Increases correlation"
      ],
      "correctAnswerIndex": 1,
      "explanation": "PCA transforms correlated features into uncorrelated principal components, reducing redundancy."
    },
    {
      "id": 17,
      "questionText": "Scenario: You want to reduce noise in data. PCA helps by:",
      "options": [
        "Scaling the first component only",
        "Adding more features",
        "Increasing learning rate",
        "Discarding components with low variance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Low-variance components often represent noise; removing them denoises the dataset."
    },
    {
      "id": 18,
      "questionText": "Scenario: Eigenvalues of a PCA covariance matrix are [4, 2, 1]. Which component explains the most variance?",
      "options": [
        "Second component",
        "All equally",
        "First component",
        "Third component"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Eigenvalues correspond to the variance captured; the largest eigenvalue indicates the component with most variance."
    },
    {
      "id": 19,
      "questionText": "Scenario: You apply PCA to a dataset and plot explained variance ratio. Purpose?",
      "options": [
        "Scale features",
        "Compute correlation matrix",
        "Determine how many components to keep",
        "Train a classifier"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Explained variance ratio helps decide how many components capture sufficient information."
    },
    {
      "id": 20,
      "questionText": "Which PCA component is always orthogonal to the first component?",
      "options": [
        "It may not be orthogonal",
        "Third component",
        "Second component",
        "Last component only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "By definition, each principal component is orthogonal to all previous components."
    },
    {
      "id": 21,
      "questionText": "Scenario: After PCA, some components have very small variance. Action?",
      "options": [
        "They must be retained",
        "Scale them up",
        "Add noise to them",
        "They can be discarded"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Components with negligible variance contribute little to data representation and can be removed."
    },
    {
      "id": 22,
      "questionText": "Scenario: PCA applied to non-linear data. Limitation?",
      "options": [
        "PCA generates labels",
        "PCA cannot capture non-linear relationships",
        "PCA increases variance",
        "PCA overfits"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Standard PCA is linear and cannot model complex non-linear structures; kernel PCA may be used instead."
    },
    {
      "id": 23,
      "questionText": "Scenario: PCA reduces features from 5D to 2D. Data reconstruction is approximate. Why?",
      "options": [
        "Labels change",
        "Information is lost in discarded components",
        "PCA adds noise",
        "Variance is increased"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Dimensionality reduction retains only top components, losing some original information."
    },
    {
      "id": 24,
      "questionText": "Which metric is used to measure how much variance is captured by selected components?",
      "options": [
        "Mean squared error",
        "Correlation coefficient",
        "Explained variance ratio",
        "Euclidean distance"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Explained variance ratio shows the proportion of total variance captured by each principal component."
    },
    {
      "id": 25,
      "questionText": "Scenario: PCA on standardized data vs. unstandardized data. Difference?",
      "options": [
        "Standardization reduces variance",
        "Standardized data gives equal weight to all features",
        "Unstandardized data improves variance capture",
        "No difference"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Standardization prevents features with large scales from dominating the PCA components."
    },
    {
      "id": 26,
      "questionText": "Scenario: Two features are perfectly correlated. PCA effect?",
      "options": [
        "Cannot perform PCA",
        "One component captures the shared variance",
        "Both components are kept equally",
        "Variance becomes zero"
      ],
      "correctAnswerIndex": 1,
      "explanation": "PCA combines correlated features into a single principal component."
    },
    {
      "id": 27,
      "questionText": "Scenario: You want to visualize 3D data in 2D. PCA helps by:",
      "options": [
        "Adding more dimensions",
        "Projecting onto top 2 principal components",
        "Scaling features only",
        "Generating new labels"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Projection onto top principal components preserves as much variance as possible in reduced dimensions."
    },
    {
      "id": 28,
      "questionText": "Scenario: After PCA, you notice negative values in transformed features. Meaning?",
      "options": [
        "PCA failed",
        "Original data must be negative",
        "Data must be scaled again",
        "Principal components can have negative and positive values"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA components are linear combinations of original features, allowing both negative and positive values."
    },
    {
      "id": 29,
      "questionText": "Scenario: You apply PCA for feature selection. Best approach?",
      "options": [
        "Discard largest components",
        "Select random components",
        "Keep all features",
        "Select top components explaining desired variance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Top principal components capture most variance and are most informative for feature selection."
    },
    {
      "id": 30,
      "questionText": "Scenario: PCA is applied to a dataset with 100 features. First 10 components explain 95% variance. Next step?",
      "options": [
        "Use all 100 features",
        "Discard the 10 components",
        "Add more features",
        "Use 10 components for reduced dataset"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Using the first 10 components retains 95% of information while reducing dimensionality significantly."
    },
    {
      "id": 31,
      "questionText": "Scenario: You apply PCA but some features dominate due to large variance. Solution?",
      "options": [
        "Standardize the features",
        "Remove features with high variance",
        "Apply k-means clustering",
        "Reduce dataset size"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Standardizing ensures all features contribute equally, preventing dominance of large-scale features."
    },
    {
      "id": 32,
      "questionText": "Scenario: PCA eigenvalues are [5, 2, 0.5, 0.1]. What does the smallest eigenvalue indicate?",
      "options": [
        "Largest variance",
        "Component is most important",
        "Least variance along that component",
        "PCA failed"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Eigenvalues represent variance along principal components; the smallest captures minimal variance."
    },
    {
      "id": 33,
      "questionText": "Scenario: Two components have similar eigenvalues. What to do?",
      "options": [
        "Discard one randomly",
        "Keep both as they explain similar variance",
        "Combine them manually",
        "Always choose the first"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Similar eigenvalues indicate both components carry significant information; both should be retained."
    },
    {
      "id": 34,
      "questionText": "Scenario: You want to analyze which original features contribute to a principal component. Technique?",
      "options": [
        "Check explained variance ratio only",
        "Remove low variance features",
        "Examine component loadings (eigenvectors)",
        "Normalize the dataset"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Loadings show the weight of each original feature in a principal component, indicating contribution."
    },
    {
      "id": 35,
      "questionText": "Scenario: PCA applied on correlation matrix vs covariance matrix. Difference?",
      "options": [
        "Correlation matrix standardizes features; covariance matrix uses original scale",
        "No difference",
        "Covariance matrix reduces variance",
        "Correlation matrix increases variance"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Correlation matrix accounts for differing scales by standardizing variables before computing PCA."
    },
    {
      "id": 36,
      "questionText": "Scenario: PCA is applied to noisy data. Effect of noise?",
      "options": [
        "Noise appears in low-variance components",
        "Noise improves variance",
        "Noise is amplified in all components",
        "Noise disappears automatically"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High-frequency noise often contributes little variance and is captured in later components, which can be discarded."
    },
    {
      "id": 37,
      "questionText": "Scenario: You want to visualize 4D data in 2D using PCA. Which components to use?",
      "options": [
        "Last 2 components",
        "All 4 components",
        "Top 2 principal components",
        "Random 2 features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Top components retain most variance, providing the best 2D representation of high-dimensional data."
    },
    {
      "id": 38,
      "questionText": "Scenario: PCA applied on dataset with zero mean. Why mean-centering?",
      "options": [
        "Normalizes labels",
        "Increases variance artificially",
        "Reduces number of features",
        "Ensures first component captures maximum variance from origin"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Mean-centering removes bias and ensures principal components represent variance relative to the mean."
    },
    {
      "id": 39,
      "questionText": "Scenario: PCA applied but first component explains only 20% variance. Interpretation?",
      "options": [
        "First component is irrelevant",
        "Data variance is spread across many components",
        "Reduce dataset size",
        "PCA failed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Low variance in first component indicates no single direction dominates; variance is more uniform across features."
    },
    {
      "id": 40,
      "questionText": "Scenario: You perform PCA on features measured in different units. Why important?",
      "options": [
        "To generate labels",
        "To increase explained variance",
        "To remove features",
        "To prevent unit differences from skewing components"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Standardization equalizes units, ensuring PCA reflects intrinsic data structure rather than measurement scale."
    },
    {
      "id": 41,
      "questionText": "Scenario: Projecting data back from reduced PCA components to original space. Effect?",
      "options": [
        "Perfect reconstruction always",
        "Approximate reconstruction with some information loss",
        "Increase variance",
        "Remove correlations"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Dimensionality reduction discards minor components, so reconstruction is approximate."
    },
    {
      "id": 42,
      "questionText": "Scenario: PCA on sparse high-dimensional data. Which method can help?",
      "options": [
        "Sparse PCA",
        "Random projection",
        "Feature scaling",
        "Standard PCA only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Sparse PCA introduces sparsity constraints to handle high-dimensional data efficiently."
    },
    {
      "id": 43,
      "questionText": "Scenario: Kernel PCA vs standard PCA. Advantage?",
      "options": [
        "Kernel PCA reduces variance",
        "Kernel PCA does not reduce dimensions",
        "Kernel PCA captures non-linear patterns",
        "Standard PCA is better for non-linear data"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Kernel PCA uses kernel functions to capture non-linear relationships, unlike linear PCA."
    },
    {
      "id": 44,
      "questionText": "Scenario: PCA applied to dataset, but covariance matrix is singular. Cause?",
      "options": [
        "Data standardized",
        "Number of features > number of samples",
        "Explained variance too high",
        "Too few features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "A singular covariance matrix occurs when the data matrix has more features than samples, causing linear dependency."
    },
    {
      "id": 45,
      "questionText": "Scenario: After PCA, a component has zero eigenvalue. Meaning?",
      "options": [
        "Data is invalid",
        "No variance along this component",
        "PCA failed",
        "Most important component"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Zero eigenvalue indicates the component captures no variation in the dataset."
    },
    {
      "id": 46,
      "questionText": "Scenario: PCA used for feature reduction in regression. Benefit?",
      "options": [
        "Removes labels",
        "Increases overfitting",
        "Generates noise",
        "Reduces multicollinearity and model complexity"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA produces uncorrelated features, mitigating multicollinearity and simplifying models."
    },
    {
      "id": 47,
      "questionText": "Scenario: PCA shows first 3 components explain 85% variance. Choice?",
      "options": [
        "Keep 3 components for reduced dataset",
        "Keep only first",
        "Discard all 3",
        "Keep all original features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Top components capturing majority variance are sufficient for dimensionality reduction."
    },
    {
      "id": 48,
      "questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?",
      "options": [
        "Important variance discarded in low components",
        "Features were not standardized",
        "Too few principal components retained",
        "All of the above"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features."
    },
    {
      "id": 49,
      "questionText": "Scenario: PCA on image dataset. First component represents lighting changes. Meaning?",
      "options": [
        "Reduce dataset size",
        "Lighting has no effect",
        "Largest variance is due to lighting, not object content",
        "PCA failed"
      ],
      "correctAnswerIndex": 2,
      "explanation": "PCA captures directions of maximal variance; if lighting dominates, first component encodes lighting."
    },
    {
      "id": 50,
      "questionText": "Scenario: You need interpretability for principal components. Technique?",
      "options": [
        "Examine feature loadings",
        "Discard low variance components",
        "Use explained variance only",
        "Standardize data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Loadings show contribution of each original feature to principal components, aiding interpretation."
    },
    {
      "id": 51,
      "questionText": "Scenario: PCA applied on time-series features. Issue?",
      "options": [
        "Eigenvalues become negative",
        "Labels are affected",
        "Variance is increased",
        "Temporal structure may be lost"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Standard PCA ignores sequence information; temporal relationships may not be preserved."
    },
    {
      "id": 52,
      "questionText": "Scenario: Dataset contains categorical variables. PCA requirement?",
      "options": [
        "Convert to numerical via encoding",
        "Remove them",
        "No change required",
        "Use labels directly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "PCA requires numeric input; categorical features must be encoded first (e.g., one-hot encoding)."
    },
    {
      "id": 53,
      "questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?",
      "options": [
        "Increases data size",
        "Reduces noise and speeds computation",
        "Removes clusters",
        "Generates labels"
      ],
      "correctAnswerIndex": 1,
      "explanation": "PCA simplifies data, removes redundant features, and accelerates clustering algorithms."
    },
    {
      "id": 54,
      "questionText": "Scenario: After PCA, components are used in regression. Advantage?",
      "options": [
        "Remove labels",
        "Avoid multicollinearity and improve stability",
        "Increases overfitting",
        "Increase computation"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Principal components are uncorrelated, reducing multicollinearity in regression."
    },
    {
      "id": 55,
      "questionText": "Scenario: PCA applied to normalized vs standardized features. Impact?",
      "options": [
        "No impact",
        "Normalized features fail PCA",
        "Standardization is crucial for unequal scales",
        "Variance is reduced"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Features with different scales must be standardized; normalization alone may not equalize contribution."
    },
    {
      "id": 56,
      "questionText": "Scenario: First principal component explains 40% variance, second 25%. What % variance is left?",
      "options": [
        "40%",
        "35%",
        "25%",
        "65%"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Remaining variance = 100% - (40% + 25%) = 35%."
    },
    {
      "id": 57,
      "questionText": "Scenario: PCA used on finance dataset. First component dominated by one stock. Meaning?",
      "options": [
        "Data should be reduced",
        "This stock has highest variance in data",
        "Stock is irrelevant",
        "PCA failed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Principal components capture directions of maximum variance; one high-variance stock can dominate."
    },
    {
      "id": 58,
      "questionText": "Scenario: PCA applied on small dataset. Risk?",
      "options": [
        "PCA fails",
        "Components become identical",
        "Overfitting and noisy components",
        "Variance increases"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Small datasets may produce unstable covariance estimates, leading to noisy components."
    },
    {
      "id": 59,
      "questionText": "Scenario: You want to project new data using previously fitted PCA. Method?",
      "options": [
        "Cannot project new data",
        "Recompute PCA",
        "Multiply new data by learned component matrix",
        "Use labels only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "New data can be projected by applying the PCA transformation learned from training data."
    },
    {
      "id": 60,
      "questionText": "Scenario: PCA shows negative loadings. Meaning?",
      "options": [
        "Error in computation",
        "Variance is negative",
        "Feature removed",
        "Feature negatively correlates with component"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Negative loadings indicate the original feature moves in opposite direction to the component."
    },
    {
      "id": 61,
      "questionText": "Scenario: PCA applied to very high-dimensional genomic data. Challenge?",
      "options": [
        "Variance is too high",
        "Cannot compute eigenvectors",
        "Labels cannot be used",
        "Covariance matrix may be singular or noisy"
      ],
      "correctAnswerIndex": 3,
      "explanation": "High dimensionality with few samples can make the covariance matrix singular and PCA unstable."
    },
    {
      "id": 62,
      "questionText": "Scenario: PCA on dataset with outliers. Effect?",
      "options": [
        "PCA removes outliers",
        "Outliers may distort principal components",
        "Outliers are ignored automatically",
        "Variance increases uniformly"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Outliers can dominate variance, affecting directions of principal components."
    },
    {
      "id": 63,
      "questionText": "Scenario: PCA applied for compression. Target explained variance?",
      "options": [
        "Keep all components",
        "Keep only first component",
        "Choose enough components to capture 90–95% variance",
        "Discard top components"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Selecting components that retain most variance ensures compression without losing significant information."
    },
    {
      "id": 64,
      "questionText": "Scenario: PCA reduces dataset but downstream classifier performs worse. Likely reason?",
      "options": [
        "PCA cannot be applied to numeric data",
        "Too few principal components retained",
        "All of the above",
        "Features were not standardized",
        "Important variance discarded in low components"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Poor classifier performance can result from lost variance, insufficient components, or unstandardized features."
    },
    {
      "id": 65,
      "questionText": "Scenario: PCA used for exploratory analysis. Benefit?",
      "options": [
        "Removes labels",
        "Increases dimensionality",
        "Generates random features",
        "Reveals patterns, clusters, and correlations"
      ],
      "correctAnswerIndex": 3,
      "explanation": "PCA simplifies data and highlights underlying patterns or groupings."
    },
    {
      "id": 66,
      "questionText": "Scenario: PCA reduces dataset from 50 to 10 features. Effect on storage?",
      "options": [
        "Significant reduction in storage and computation",
        "Increases storage",
        "No change",
        "Removes labels"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Fewer features reduce memory usage and accelerate computations."
    },
    {
      "id": 67,
      "questionText": "Scenario: PCA used on correlation matrix. Advantage?",
      "options": [
        "Reduces variance",
        "Generates labels",
        "Equalizes feature scales and emphasizes relative relationships",
        "Removes low-variance features only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Correlation matrix ensures features with different units or scales contribute proportionally to PCA."
    },
    {
      "id": 68,
      "questionText": "Scenario: After PCA, some features have nearly zero loadings across components. Meaning?",
      "options": [
        "Variance is too high",
        "These features contribute little variance and can be discarded",
        "They are most important",
        "PCA failed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Features with negligible loadings do not influence principal components significantly."
    },
    {
      "id": 69,
      "questionText": "Scenario: PCA applied to dataset with correlated noise. Effect?",
      "options": [
        "All variance captured by noise",
        "Noise may form separate low-variance components",
        "PCA fails",
        "Noise dominates first component"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Correlated noise often appears in later components with low variance."
    },
    {
      "id": 70,
      "questionText": "Scenario: You want to reduce dimensionality without losing much information. PCA strategy?",
      "options": [
        "Use all components",
        "Keep enough components to capture desired variance (e.g., 90–95%)",
        "Keep only first component",
        "Discard components randomly"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Selecting enough principal components ensures dimensionality reduction while retaining most data information."
    },
    {
      "id": 71,
      "questionText": "Scenario: Kernel PCA is used instead of standard PCA. Benefit?",
      "options": [
        "Reduces dimensionality linearly",
        "Captures non-linear relationships in the data",
        "Removes outliers automatically",
        "Generates labels"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Kernel PCA uses kernel functions to map data into higher-dimensional space to capture non-linear patterns."
    },
    {
      "id": 72,
      "questionText": "Scenario: Sparse PCA is applied on high-dimensional genomic data. Advantage?",
      "options": [
        "Improves label prediction automatically",
        "Maximizes variance only",
        "Generates components with few non-zero loadings for interpretability",
        "Removes all correlations"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Sparse PCA introduces sparsity constraints, creating components influenced by fewer original features for easier interpretation."
    },
    {
      "id": 73,
      "questionText": "Scenario: PCA is applied to compress image data. How to measure quality of compression?",
      "options": [
        "Variance ratio only",
        "Correlation of first component with pixels",
        "Reconstruction error (difference between original and reconstructed images)",
        "Number of components kept"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reconstruction error quantifies information loss during dimensionality reduction, evaluating compression quality."
    },
    {
      "id": 74,
      "questionText": "Scenario: PCA applied to multicollinear financial features. Effect?",
      "options": [
        "Increases collinearity",
        "Reduces multicollinearity by generating uncorrelated components",
        "Removes variance",
        "Generates labels"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Principal components are orthogonal, addressing multicollinearity issues in regression or predictive models."
    },
    {
      "id": 75,
      "questionText": "Scenario: PCA applied on large sparse document-term matrix. Challenge?",
      "options": [
        "PCA cannot be applied",
        "High dimensionality and sparsity require optimized algorithms",
        "All features dominate equally",
        "Variance becomes negative"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Sparse high-dimensional data may need techniques like randomized PCA to efficiently compute components."
    },
    {
      "id": 76,
      "questionText": "Scenario: PCA applied to time-series data. Concern?",
      "options": [
        "Labels are changed",
        "Temporal correlations may be ignored",
        "PCA reduces samples",
        "Variance increases"
      ],
      "correctAnswerIndex": 1,
      "explanation": "PCA does not account for order in sequences; temporal patterns may be lost."
    },
    {
      "id": 77,
      "questionText": "Scenario: PCA reduces 100 features to 5 components. Downstream model performance drops. Likely cause?",
      "options": [
        "All of the above",
        "Data not standardized",
        "Too few samples",
        "Important low-variance features were discarded"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Discarding low-variance features may remove predictive information; other preprocessing issues can also affect performance."
    },
    {
      "id": 78,
      "questionText": "Scenario: PCA is used for anomaly detection. Approach?",
      "options": [
        "Discard all components",
        "Use first component only",
        "Model normal data with top components and examine reconstruction error",
        "Apply PCA on labels"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Anomalies often lie in directions of low variance; reconstruction error from PCA can identify unusual data points."
    },
    {
      "id": 79,
      "questionText": "Scenario: You apply PCA on a dataset with missing values. Best approach?",
      "options": [
        "PCA fills missing values automatically",
        "Ignore missing values",
        "Impute missing values before PCA",
        "Discard rows with missing values"
      ],
      "correctAnswerIndex": 2,
      "explanation": "PCA requires complete numerical data; missing values must be imputed or handled before applying PCA."
    },
    {
      "id": 80,
      "questionText": "Scenario: PCA applied and first component aligns with single feature. Interpretation?",
      "options": [
        "This feature dominates variance in the dataset",
        "Component is irrelevant",
        "All features are equally important",
        "PCA failed"
      ],
      "correctAnswerIndex": 0,
      "explanation": "When a single feature dominates variance, the first principal component aligns closely with that feature."
    },
    {
      "id": 81,
      "questionText": "Scenario: You perform PCA and observe negative explained variance ratio for a component. Reason?",
      "options": [
        "Data was not mean-centered properly",
        "Eigenvectors are invalid",
        "Variance is negative",
        "PCA cannot run on this data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Improper centering can lead to incorrect covariance matrix, causing negative variance calculations."
    },
    {
      "id": 82,
      "questionText": "Scenario: PCA applied on data with categorical features encoded as one-hot. Concern?",
      "options": [
        "Variance decreases automatically",
        "Labels are affected",
        "Components become identical",
        "High dimensionality may lead to sparse components"
      ],
      "correctAnswerIndex": 3,
      "explanation": "One-hot encoding increases dimensions, producing sparse data; special handling or sparse PCA may be useful."
    },
    {
      "id": 83,
      "questionText": "Scenario: After PCA, you plot a biplot. Purpose?",
      "options": [
        "Scale data",
        "Generate labels",
        "Visualize principal components and feature contributions",
        "Remove low-variance components"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Biplots show both projected data points and how original features contribute to components."
    },
    {
      "id": 84,
      "questionText": "Scenario: PCA applied on standardized vs non-standardized data with different scales. Outcome?",
      "options": [
        "Standardization ensures fair contribution; non-standardized may bias components",
        "Non-standardized data improves variance",
        "No difference",
        "Variance is reduced in standardized data"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Features with large scales dominate components without standardization, skewing PCA results."
    },
    {
      "id": 85,
      "questionText": "Scenario: You want interpretable PCA components. Which approach?",
      "options": [
        "Keep all components",
        "Sparse PCA or rotation methods like varimax",
        "Use first component only",
        "Discard low-variance features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Sparse PCA and rotation techniques improve interpretability by reducing the number of features contributing to each component."
    },
    {
      "id": 86,
      "questionText": "Scenario: PCA reduces dimensions for clustering. Benefit?",
      "options": [
        "Removes noise, reduces computation, highlights clusters",
        "Increases dimensionality",
        "Generates labels",
        "Removes clusters"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Reduced, de-noised features simplify clustering and often improve performance."
    },
    {
      "id": 87,
      "questionText": "Scenario: PCA eigenvectors are not unique. Reason?",
      "options": [
        "PCA failed",
        "Covariance matrix invalid",
        "Eigenvectors are unique up to sign; direction can flip",
        "Variance negative"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Eigenvectors can be multiplied by -1 without changing the subspace, so they are not unique in sign."
    },
    {
      "id": 88,
      "questionText": "Scenario: PCA applied to data where noise dominates variance. Risk?",
      "options": [
        "Components may represent noise rather than signal",
        "Variance reduces",
        "All information preserved",
        "Components become identical"
      ],
      "correctAnswerIndex": 0,
      "explanation": "High-variance noise can dominate principal components, reducing meaningful representation of data."
    },
    {
      "id": 89,
      "questionText": "Scenario: PCA applied on dataset with many features having zero variance. Effect?",
      "options": [
        "These features are ignored in covariance computation",
        "PCA fails",
        "Variance increases",
        "Components become identical"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Zero-variance features do not contribute to covariance and do not affect PCA results."
    },
    {
      "id": 90,
      "questionText": "Scenario: PCA applied to compress hyperspectral image. Key consideration?",
      "options": [
        "Retain components capturing most spectral variance for accurate reconstruction",
        "Discard high-variance components",
        "Keep only first component",
        "Generate labels automatically"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Hyperspectral data has many correlated channels; top components capture essential information while reducing data size."
    },
    {
      "id": 91,
      "questionText": "Scenario: PCA applied and first component is dominated by outlier. Solution?",
      "options": [
        "Keep data as-is",
        "Remove or mitigate outliers before PCA",
        "Scale only first feature",
        "Discard PCA entirely"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Outliers can skew variance and principal directions; handling them ensures meaningful PCA components."
    },
    {
      "id": 92,
      "questionText": "Scenario: You need to project streaming data onto PCA components. Requirement?",
      "options": [
        "Project only first sample",
        "Cannot apply PCA",
        "Recompute PCA each time",
        "Use incremental PCA or precomputed components"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Incremental PCA allows efficient projection of new data without recomputing from scratch."
    },
    {
      "id": 93,
      "questionText": "Scenario: PCA reduces dimensionality but variance explained is too low. Solution?",
      "options": [
        "Remove first component",
        "Normalize data again",
        "Keep more components",
        "Discard components"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Increasing number of components ensures more variance is retained for downstream tasks."
    },
    {
      "id": 94,
      "questionText": "Scenario: PCA applied on correlated features with different scales. Effect if not standardized?",
      "options": [
        "Variance is evenly distributed",
        "Components are orthogonal",
        "PCA fails",
        "Large-scale features dominate components"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Without standardization, features with larger numeric ranges contribute more variance, skewing PCA results."
    },
    {
      "id": 95,
      "questionText": "Scenario: PCA used for dimensionality reduction prior to deep learning. Benefit?",
      "options": [
        "Generates labels",
        "Increases overfitting",
        "Reduces input size and noise, improving training efficiency",
        "Removes all variance"
      ],
      "correctAnswerIndex": 2,
      "explanation": "PCA simplifies input features, removing redundant information and reducing computational load."
    },
    {
      "id": 96,
      "questionText": "Scenario: PCA applied but some components are highly correlated. Why unusual?",
      "options": [
        "Principal components should be orthogonal; correlation indicates an issue",
        "Variance is low",
        "Labels are influencing components",
        "Expected in standard PCA"
      ],
      "correctAnswerIndex": 0,
      "explanation": "PCA produces orthogonal components; correlated components suggest computation or preprocessing errors."
    },
    {
      "id": 97,
      "questionText": "Scenario: PCA applied on multi-class dataset for visualization. Approach?",
      "options": [
        "Discard labels",
        "Keep all features",
        "Project onto top 2 or 3 components and color points by class",
        "Use only first component"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Low-dimensional projection allows visualization of class separation while preserving maximal variance."
    },
    {
      "id": 98,
      "questionText": "Scenario: PCA applied with top components explaining 80% variance. Downstream task requires 95%. Action?",
      "options": [
        "Normalize data again",
        "Use only top components",
        "Include additional components until 95% variance is captured",
        "Discard low components"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Selecting enough components ensures sufficient information is retained for downstream analysis."
    },
    {
      "id": 99,
      "questionText": "Scenario: PCA applied to text embeddings. Challenge?",
      "options": [
        "High dimensionality and sparsity require careful computation",
        "Variance is negative",
        "PCA fails automatically",
        "Components lose meaning entirely"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Text embeddings are often high-dimensional; PCA helps reduce size but may require optimized algorithms."
    },
    {
      "id": 100,
      "questionText": "Scenario: PCA used for feature selection. How to choose components?",
      "options": [
        "Choose random components",
        "Discard high-variance components",
        "Use only first component",
        "Select components explaining desired variance threshold (e.g., 90–95%)"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Selecting top components ensures maximal retained information while reducing dimensionality."
    }
  ]
}