Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

File size: 45,960 Bytes

0d00d62

{
  "title": "K-Nearest Neighbors (KNN) Mastery: 100 MCQs",
  "description": "A comprehensive set of 100 multiple-choice questions focused entirely on K-Nearest Neighbors (KNN) — covering intuition, distance metrics, hyperparameter tuning, classification & regression behavior, curse of dimensionality, and real-world use cases.",
  "questions": [
    {
      "id": 1,
      "questionText": "What is the core principle behind the KNN algorithm?",
      "options": [
        "It builds a decision tree and splits data recursively.",
        "It constructs a probabilistic model using Bayes theorem.",
        "It predicts the label based on the majority class of k nearest data points.",
        "It reduces dimensionality using PCA."
      ],
      "correctAnswerIndex": 2,
      "explanation": "KNN predicts the class based on voting from the k nearest neighbors in the training data."
    },
    {
      "id": 2,
      "questionText": "KNN is considered which type of learning algorithm?",
      "options": [
        "Eager learning",
        "Reinforcement learning",
        "Unsupervised learning",
        "Lazy learning"
      ],
      "correctAnswerIndex": 3,
      "explanation": "KNN is a lazy learner because it does not build a model during training; it only stores the data."
    },
    {
      "id": 3,
      "questionText": "Which distance metric is most commonly used in KNN?",
      "options": [
        "Cosine Similarity",
        "Manhattan Distance",
        "Jaccard Distance",
        "Euclidean Distance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Euclidean Distance (L2 norm) is the most common distance metric used in KNN."
    },
    {
      "id": 4,
      "questionText": "KNN is mainly used for:",
      "options": [
        "Only regression",
        "Only clustering",
        "Both classification and regression",
        "Only classification"
      ],
      "correctAnswerIndex": 2,
      "explanation": "KNN can perform both classification (class votes) and regression (mean of k nearest values)."
    },
    {
      "id": 5,
      "questionText": "What happens if k is set to a very large value?",
      "options": [
        "The model becomes faster and more accurate always.",
        "The model becomes overly generalized and biased.",
        "The model becomes highly sensitive to noise.",
        "The model becomes very overfitted."
      ],
      "correctAnswerIndex": 1,
      "explanation": "A very large k considers too many neighbors and may smooth out genuine class boundaries, causing high bias."
    },
    {
      "id": 6,
      "questionText": "In KNN, what does the parameter 'k' represent?",
      "options": [
        "Number of features in the dataset",
        "Depth of the tree used internally",
        "Number of nearest neighbors considered",
        "Learning rate of the algorithm"
      ],
      "correctAnswerIndex": 2,
      "explanation": "'k' is the number of closest neighbors used to decide the predicted class or value."
    },
    {
      "id": 7,
      "questionText": "Which of the following is true about KNN training phase?",
      "options": [
        "It stores all training data without building a model",
        "It builds a model by computing centroids",
        "It generates decision boundaries explicitly",
        "It calculates feature importance scores"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KNN is a lazy learner; during training, it only stores the dataset for use at prediction time."
    },
    {
      "id": 8,
      "questionText": "Which KNN variant can handle weighted voting?",
      "options": [
        "Uniform KNN",
        "Decision Tree",
        "Random Forest",
        "Weighted KNN"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Weighted KNN gives closer neighbors higher influence while predicting the output."
    },
    {
      "id": 9,
      "questionText": "Which of the following affects KNN performance the most?",
      "options": [
        "Choice of distance metric",
        "Activation function",
        "Regularization parameter",
        "Number of epochs"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KNN relies on distance computations; the choice of distance metric (Euclidean, Manhattan, etc.) is critical."
    },
    {
      "id": 10,
      "questionText": "What is the default distance metric for most KNN implementations?",
      "options": [
        "Cosine similarity",
        "Manhattan distance",
        "Hamming distance",
        "Euclidean distance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Euclidean distance is most commonly used by default in KNN implementations."
    },
    {
      "id": 11,
      "questionText": "How does KNN handle a new data point for prediction?",
      "options": [
        "It updates its model parameters",
        "It finds k closest points in the training set and predicts based on them",
        "It generates random prediction",
        "It builds a regression line through neighbors"
      ],
      "correctAnswerIndex": 1,
      "explanation": "KNN predicts by looking at the nearest k training points and using majority vote (classification) or average (regression)."
    },
    {
      "id": 12,
      "questionText": "What is the main drawback of KNN on large datasets?",
      "options": [
        "Does not scale to many classes",
        "High training time",
        "Cannot handle missing values",
        "High prediction time"
      ],
      "correctAnswerIndex": 3,
      "explanation": "KNN stores all training data, so prediction involves computing distances to all points, which is slow for large datasets."
    },
    {
      "id": 13,
      "questionText": "Which of the following is true about KNN and normalization?",
      "options": [
        "Normalization is not required",
        "Normalization only applies to categorical data",
        "Normalization changes class labels",
        "Normalization improves distance-based predictions"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Since KNN uses distances, features with larger scales can dominate. Normalization ensures fair contribution from all features."
    },
    {
      "id": 14,
      "questionText": "How does KNN behave in the presence of irrelevant features?",
      "options": [
        "Features are automatically ignored",
        "Performance improves",
        "Performance drops",
        "Algorithm ignores them during prediction"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Irrelevant features can distort distance calculations and reduce KNN prediction accuracy."
    },
    {
      "id": 15,
      "questionText": "What type of algorithm is KNN considered in terms of model structure?",
      "options": [
        "Non-parametric",
        "Linear",
        "Probabilistic",
        "Parametric"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KNN is non-parametric because it does not assume a predefined form for the function mapping inputs to outputs."
    },
    {
      "id": 16,
      "questionText": "Which K value is generally recommended to avoid overfitting in KNN?",
      "options": [
        "k moderate value like sqrt(n)",
        "k = 1",
        "k very small",
        "k equal to dataset size"
      ],
      "correctAnswerIndex": 0,
      "explanation": "A moderate k like sqrt(n) balances bias and variance, preventing overfitting."
    },
    {
      "id": 17,
      "questionText": "Which metric is suitable for categorical variables in KNN?",
      "options": [
        "Minkowski distance",
        "Manhattan distance",
        "Euclidean distance",
        "Hamming distance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Hamming distance counts mismatches between categorical feature values."
    },
    {
      "id": 18,
      "questionText": "Which of the following is NOT a type of KNN?",
      "options": [
        "Weighted KNN",
        "Regression KNN",
        "Decision KNN",
        "Classification KNN"
      ],
      "correctAnswerIndex": 2,
      "explanation": "There is no 'Decision KNN'; KNN is mainly classification, regression, or weighted variant."
    },
    {
      "id": 19,
      "questionText": "What is the effect of having two classes with very imbalanced sizes in KNN?",
      "options": [
        "Minority class dominates predictions",
        "Majority class dominates predictions",
        "KNN automatically balances classes",
        "Minor impact on accuracy"
      ],
      "correctAnswerIndex": 1,
      "explanation": "KNN predictions are influenced by majority neighbors; imbalanced classes may bias the results."
    },
    {
      "id": 20,
      "questionText": "What is the primary storage requirement for KNN?",
      "options": [
        "Feature coefficients",
        "All training data points",
        "Decision thresholds",
        "Distance matrices precomputed"
      ],
      "correctAnswerIndex": 1,
      "explanation": "KNN requires storing all training data for distance comparisons at prediction time."
    },
    {
      "id": 21,
      "questionText": "What does the term 'curse of dimensionality' refer to in KNN?",
      "options": [
        "Overfitting in small datasets",
        "High computation time with too many neighbors",
        "Distances become less meaningful in high dimensions",
        "Underfitting in large datasets"
      ],
      "correctAnswerIndex": 2,
      "explanation": "As dimensions increase, data points become sparse and distance measures lose effectiveness, reducing KNN performance."
    },
    {
      "id": 22,
      "questionText": "Which technique can speed up KNN on large datasets?",
      "options": [
        "KD-Trees or Ball-Trees",
        "Using logistic regression instead",
        "Principal Component Analysis",
        "Random Forest preprocessing"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KD-Trees and Ball-Trees organize data to quickly find nearest neighbors without computing all distances."
    },
    {
      "id": 23,
      "questionText": "In KNN regression, how is the predicted value calculated?",
      "options": [
        "Using linear regression on neighbors",
        "Using gradient descent",
        "Majority vote of nearest neighbors",
        "Average of nearest neighbors’ values"
      ],
      "correctAnswerIndex": 3,
      "explanation": "KNN regression predicts by taking the mean (or sometimes weighted mean) of the k nearest neighbors' values."
    },
    {
      "id": 24,
      "questionText": "Which of the following is true about KNN decision boundary?",
      "options": [
        "Always axis-aligned",
        "Always linear",
        "Depends on data distribution",
        "Always circular"
      ],
      "correctAnswerIndex": 2,
      "explanation": "KNN decision boundaries can be irregular and follow the shape of data; they are not restricted to linear forms."
    },
    {
      "id": 25,
      "questionText": "Which method can improve KNN on high-dimensional data?",
      "options": [
        "Increasing k to dataset size",
        "Feature selection",
        "Ignoring normalization",
        "Adding more neighbors"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Selecting relevant features reduces dimensionality, improving distance calculation reliability."
    },
    {
      "id": 26,
      "questionText": "KNN cannot handle which of the following natively?",
      "options": [
        "Large datasets efficiently",
        "Numeric features",
        "Categorical features",
        "Missing data directly"
      ],
      "correctAnswerIndex": 3,
      "explanation": "KNN cannot handle missing values without preprocessing (imputation or removal)."
    },
    {
      "id": 27,
      "questionText": "How does KNN handle ties in classification voting?",
      "options": [
        "Chooses randomly among tied classes",
        "Fails with an error",
        "Chooses the closest neighbor's class",
        "Always chooses class 0"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Many implementations break ties by selecting the class of the closest neighbor among the tied classes."
    },
    {
      "id": 28,
      "questionText": "Which scenario would make KNN less suitable?",
      "options": [
        "Low-dimensional small datasets",
        "High-dimensional large datasets",
        "Well-separated clusters",
        "Binary classification"
      ],
      "correctAnswerIndex": 1,
      "explanation": "In high-dimensional large datasets, KNN is slow and distances lose meaning, reducing accuracy."
    },
    {
      "id": 29,
      "questionText": "What is the time complexity of a naive KNN prediction with n training points?",
      "options": [
        "O(n^2)",
        "O(1)",
        "O(log n)",
        "O(n)"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Naive KNN computes distances to all n points for each prediction, giving O(n) complexity."
    },
    {
      "id": 30,
      "questionText": "What preprocessing step can improve KNN accuracy?",
      "options": [
        "Adding irrelevant features",
        "Removing the dependent variable",
        "Randomly shuffling the data",
        "Scaling features to similar range"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Scaling features ensures fair distance computation, preventing one feature from dominating due to larger numeric range."
    },
    {
      "id": 31,
      "questionText": "What is the effect of increasing 'k' in KNN classification?",
      "options": [
        "Decreases bias",
        "Reduces overfitting",
        "Increases sensitivity to noise",
        "Increases model variance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "A larger k smooths out predictions, reducing overfitting and variance but increasing bias."
    },
    {
      "id": 32,
      "questionText": "Which distance metric can be more robust to outliers in KNN?",
      "options": [
        "Cosine similarity",
        "Minkowski distance",
        "Manhattan distance",
        "Euclidean distance"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Manhattan distance is less sensitive to large deviations in individual features than Euclidean distance."
    },
    {
      "id": 33,
      "questionText": "How can KNN be modified for imbalanced datasets?",
      "options": [
        "Use weighted voting based on distance",
        "Increase k to dataset size",
        "Normalize features only",
        "Remove minority class samples"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Weighted voting gives closer neighbors more influence, reducing bias toward the majority class."
    },
    {
      "id": 34,
      "questionText": "Which method can reduce KNN prediction time for large datasets?",
      "options": [
        "Dimensionality reduction like PCA",
        "Using random shuffling",
        "Increasing k",
        "Adding more features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Reducing the number of features with PCA lowers dimensionality, which speeds up distance computation."
    },
    {
      "id": 35,
      "questionText": "Why might KNN fail in very high-dimensional spaces?",
      "options": [
        "Overfitting to majority class",
        "Random initialization",
        "Learning rate too high",
        "Curse of dimensionality"
      ],
      "correctAnswerIndex": 3,
      "explanation": "In high dimensions, points become equidistant and neighbors are less meaningful, reducing accuracy."
    },
    {
      "id": 36,
      "questionText": "What does weighted KNN regression use instead of simple averaging?",
      "options": [
        "Distance-based weighting of neighbors",
        "Median value of neighbors",
        "Majority vote of neighbors",
        "Random selection of neighbors"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Weighted KNN regression assigns higher weights to closer neighbors when computing the predicted value."
    },
    {
      "id": 37,
      "questionText": "Which technique is useful to handle categorical and numeric features together in KNN?",
      "options": [
        "Ignore numeric features",
        "Convert categorical to numeric with one-hot encoding",
        "Normalize categorical features only",
        "Use majority voting only"
      ],
      "correctAnswerIndex": 1,
      "explanation": "One-hot encoding transforms categorical features to numeric so that distance metrics can be applied."
    },
    {
      "id": 38,
      "questionText": "In KNN, what is the effect of noisy features?",
      "options": [
        "Does not affect performance",
        "Automatically removed",
        "Reduces accuracy",
        "Improves accuracy"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Noisy features distort distance calculations, reducing prediction accuracy."
    },
    {
      "id": 39,
      "questionText": "Which of the following can help KNN generalize better?",
      "options": [
        "Adding more irrelevant features",
        "Reducing k to 1",
        "Feature scaling and selection",
        "Increasing dataset size without preprocessing"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Scaling ensures fair distance comparison, and selecting relevant features removes noise, improving generalization."
    },
    {
      "id": 40,
      "questionText": "What happens if k is even and there is a tie in classification?",
      "options": [
        "Prediction fails with error",
        "Tie-breaking strategy is needed",
        "Algorithm automatically increments k",
        "Randomly ignores the new point"
      ],
      "correctAnswerIndex": 1,
      "explanation": "When k is even, ties may occur; most implementations have a tie-breaking rule like choosing the closest neighbor."
    },
    {
      "id": 41,
      "questionText": "Which preprocessing step can improve KNN on text data represented by TF-IDF vectors?",
      "options": [
        "L2 normalization",
        "Random shuffling",
        "Adding more terms",
        "Stop-word removal only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "L2 normalization ensures vectors are comparable in distance calculations for KNN."
    },
    {
      "id": 42,
      "questionText": "Which of the following affects KNN accuracy most in practice?",
      "options": [
        "Learning rate",
        "Random seed only",
        "Distance metric and k",
        "Number of trees"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Choice of k and distance metric strongly influence KNN performance."
    },
    {
      "id": 43,
      "questionText": "In KNN regression, how can you reduce the impact of outliers?",
      "options": [
        "Use simple mean without weighting",
        "Increase k to dataset size",
        "Ignore preprocessing",
        "Use weighted averaging based on distance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Weighting closer neighbors more heavily reduces the effect of distant outliers."
    },
    {
      "id": 44,
      "questionText": "Which approach can make KNN faster on large datasets?",
      "options": [
        "Increase k to max",
        "Add random noise to data",
        "KD-Tree, Ball-Tree, or approximate nearest neighbor search",
        "Use high-dimensional features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Tree-based or approximate search structures reduce distance computations needed for prediction."
    },
    {
      "id": 45,
      "questionText": "How does KNN handle multi-class classification?",
      "options": [
        "By majority vote among neighbors",
        "Cannot handle multi-class",
        "By training separate binary classifiers",
        "Only predicts top two classes"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KNN counts votes among k neighbors for all classes and selects the class with the highest votes."
    },
    {
      "id": 46,
      "questionText": "Which distance metric is suitable for high-dimensional sparse data?",
      "options": [
        "Manhattan distance",
        "Euclidean distance",
        "Cosine similarity",
        "Hamming distance"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Cosine similarity works better for high-dimensional sparse vectors like TF-IDF representations."
    },
    {
      "id": 47,
      "questionText": "What happens to KNN performance if features are not scaled?",
      "options": [
        "Dominated by features with larger scales",
        "Performance improves automatically",
        "Distance calculation is unaffected",
        "Accuracy remains same always"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Features with larger numeric ranges dominate distance computation, skewing predictions."
    },
    {
      "id": 48,
      "questionText": "How can KNN be adapted for regression with categorical features?",
      "options": [
        "Encode categories numerically or use mixed distance metric",
        "Use Euclidean distance directly",
        "Remove categorical features",
        "Only predict the most frequent category"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Encoding categorical features allows KNN to compute distances effectively for regression tasks."
    },
    {
      "id": 49,
      "questionText": "What is one common method to select an optimal k?",
      "options": [
        "Maximizing feature count",
        "Using k=1 always",
        "Random selection",
        "Cross-validation"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Cross-validation evaluates different k values to choose the one yielding best performance."
    },
    {
      "id": 50,
      "questionText": "Which factor can lead to overfitting in KNN?",
      "options": [
        "Using fewer neighbors",
        "Too small k value",
        "Scaling features",
        "Using weighted distance"
      ],
      "correctAnswerIndex": 1,
      "explanation": "A very small k (like k=1) can fit to noise and outliers, causing overfitting."
    },
    {
      "id": 51,
      "questionText": "In KNN, what is an advantage of using odd k values in binary classification?",
      "options": [
        "Avoid ties in voting",
        "Reduce distance calculations",
        "Increase speed",
        "Improve scaling automatically"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Odd k values help prevent ties between two classes."
    },
    {
      "id": 52,
      "questionText": "Which type of feature transformation is recommended for KNN?",
      "options": [
        "Adding irrelevant features",
        "One-hot encoding only for numeric data",
        "Normalization or standardization",
        "Random shuffling of features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Normalization ensures fair contribution of each feature to distance calculation."
    },
    {
      "id": 53,
      "questionText": "Which of the following reduces KNN sensitivity to outliers?",
      "options": [
        "Increase k to 1",
        "Use Euclidean distance only",
        "Remove normalization",
        "Weighted distance averaging"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Weighting neighbors based on distance gives closer points more influence, reducing outlier impact."
    },
    {
      "id": 54,
      "questionText": "In KNN, what is the effect of adding irrelevant features?",
      "options": [
        "Automatically removed",
        "Decreases accuracy",
        "Increases accuracy",
        "No effect"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Irrelevant features distort distance calculations, reducing prediction accuracy."
    },
    {
      "id": 55,
      "questionText": "Which method can improve KNN performance in sparse datasets?",
      "options": [
        "Ignore distance weighting",
        "Add noise to features",
        "Dimensionality reduction",
        "Increase k to dataset size"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing dimensionality can make distance computations more meaningful in sparse datasets."
    },
    {
      "id": 56,
      "questionText": "Which approach helps handle large-scale KNN efficiently?",
      "options": [
        "Increasing k arbitrarily",
        "Scaling only",
        "Approximate nearest neighbor search",
        "Random shuffling"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Approximate nearest neighbor search reduces computational cost while giving nearly correct neighbors."
    },
    {
      "id": 57,
      "questionText": "Which of the following is true for KNN regression prediction?",
      "options": [
        "Weighted average based on neighbor distance",
        "Average of nearest neighbors’ values",
        "None of the above",
        "Both A and B"
      ],
      "correctAnswerIndex": 3,
      "explanation": "KNN regression can use simple or weighted averaging of neighbors’ values."
    },
    {
      "id": 58,
      "questionText": "Which is a practical drawback of KNN in real-world systems?",
      "options": [
        "Requires model training",
        "High prediction latency",
        "Automatically ignores irrelevant features",
        "Cannot handle numeric data"
      ],
      "correctAnswerIndex": 1,
      "explanation": "KNN computes distances at prediction time, leading to high latency for large datasets."
    },
    {
      "id": 59,
      "questionText": "Which type of scaling preserves relative distances between points for KNN?",
      "options": [
        "Min-Max scaling",
        "Log transformation only",
        "Adding random noise",
        "Shuffling features"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Min-Max or standardization scales features to similar ranges while preserving relative distances."
    },
    {
      "id": 60,
      "questionText": "Which is a disadvantage of KNN compared to parametric models?",
      "options": [
        "Requires fixed training",
        "Slower predictions for large datasets",
        "Cannot model non-linear boundaries",
        "Sensitive to overfitting only"
      ],
      "correctAnswerIndex": 1,
      "explanation": "KNN stores all training data and computes distances, making predictions slower than parametric models."
    },
    {
      "id": 61,
      "questionText": "How can KNN handle multi-label classification?",
      "options": [
        "Uses separate KNN per label",
        "Cannot handle multi-label",
        "Predict all labels present in neighbors",
        "Only predicts one label"
      ],
      "correctAnswerIndex": 2,
      "explanation": "KNN can aggregate labels from neighbors and predict multiple labels per instance."
    },
    {
      "id": 62,
      "questionText": "Which distance metric can handle mixed numeric and categorical data?",
      "options": [
        "Gower distance",
        "Euclidean distance",
        "Cosine similarity",
        "Manhattan distance"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Gower distance can compute similarity for mixed numeric and categorical features."
    },
    {
      "id": 63,
      "questionText": "What is one way to reduce memory usage in KNN for large datasets?",
      "options": [
        "Use condensed nearest neighbor algorithms",
        "Ignore irrelevant features",
        "Increase k to dataset size",
        "Normalize only"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Condensed nearest neighbor algorithms reduce stored points while maintaining accuracy."
    },
    {
      "id": 64,
      "questionText": "Which approach helps improve KNN in imbalanced datasets?",
      "options": [
        "Increase irrelevant features",
        "Use k=1 always",
        "Distance-weighted voting",
        "Ignore normalization"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Weighted voting gives closer points more influence, reducing bias toward majority class."
    },
    {
      "id": 65,
      "questionText": "What is the effect of increasing feature dimensionality in KNN?",
      "options": [
        "Computation decreases",
        "Feature importance is automatically computed",
        "Accuracy always improves",
        "Distances become less meaningful"
      ],
      "correctAnswerIndex": 3,
      "explanation": "High-dimensional spaces make points almost equidistant, reducing KNN effectiveness."
    },
    {
      "id": 66,
      "questionText": "Which scenario can cause KNN to misclassify a data point?",
      "options": [
        "Choosing odd k",
        "Using weighted voting",
        "Nearby points from other class dominate",
        "Normalization applied"
      ],
      "correctAnswerIndex": 2,
      "explanation": "If neighbors are closer from other classes, KNN may predict incorrectly."
    },
    {
      "id": 67,
      "questionText": "Which strategy can improve KNN with very sparse datasets?",
      "options": [
        "Add random features",
        "Ignore distance metric",
        "Dimensionality reduction",
        "Increase k arbitrarily"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing dimensionality reduces sparsity and makes distances meaningful."
    },
    {
      "id": 68,
      "questionText": "What is a good rule of thumb for selecting k?",
      "options": [
        "k = 1 always",
        "k = n/2",
        "k = sqrt(n)",
        "k = number of features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Using k = sqrt(n) balances bias and variance in most cases."
    },
    {
      "id": 69,
      "questionText": "Which technique can speed up KNN predictions in high dimensions?",
      "options": [
        "Approximate nearest neighbor algorithms",
        "Normalize only",
        "Random shuffling",
        "Increase k to max"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Approximate nearest neighbor search reduces computation while maintaining accuracy."
    },
    {
      "id": 70,
      "questionText": "Which type of data preprocessing improves KNN performance?",
      "options": [
        "Random shuffling only",
        "Ignoring categorical features",
        "Adding irrelevant features",
        "Feature scaling and selection"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Scaling ensures fair distance measurement, and selecting relevant features removes noise, improving predictions."
    },
    {
      "id": 71,
      "questionText": "In a recommendation system using KNN, what could cause poor predictions?",
      "options": [
        "High number of neighbors",
        "Sparse user-item interaction data",
        "Low-dimensional features",
        "Normalized data"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Sparse interaction matrices reduce neighbor similarity reliability, causing poor recommendations."
    },
    {
      "id": 72,
      "questionText": "Which approach is suitable for reducing KNN latency in a real-time system?",
      "options": [
        "Randomly select features",
        "Increase k to dataset size",
        "Normalize data only",
        "Approximate nearest neighbor search"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Approximate nearest neighbor algorithms provide fast predictions with minimal accuracy loss."
    },
    {
      "id": 73,
      "questionText": "In high-dimensional gene expression data, KNN performance drops because:",
      "options": [
        "Normalization causes data loss",
        "KNN overfits easily with large k",
        "Distances become less informative (curse of dimensionality)",
        "Minority classes dominate"
      ],
      "correctAnswerIndex": 2,
      "explanation": "High-dimensional data makes points nearly equidistant, reducing neighbor relevance and accuracy."
    },
    {
      "id": 74,
      "questionText": "Scenario: A new customer profile is very different from existing customers. Which issue might KNN face?",
      "options": [
        "Predicted class may be inaccurate due to no similar neighbors",
        "KNN will automatically ignore the profile",
        "Model overfits automatically",
        "KNN will generate a new class"
      ],
      "correctAnswerIndex": 0,
      "explanation": "If no close neighbors exist, KNN cannot provide reliable predictions."
    },
    {
      "id": 75,
      "questionText": "What is the main challenge of KNN when deployed in high-frequency trading?",
      "options": [
        "Weighted voting is not supported",
        "Overfitting to training set",
        "Distance metric fails for numeric data",
        "High prediction latency due to large datasets"
      ],
      "correctAnswerIndex": 3,
      "explanation": "KNN requires computing distances to all stored points, making it too slow for real-time predictions in trading."
    },
    {
      "id": 76,
      "questionText": "Scenario: Two classes are very close in feature space but overlapping. Which KNN behavior is expected?",
      "options": [
        "KNN ignores overlapping points",
        "Higher misclassification rate",
        "KNN increases k automatically",
        "Predictions are perfect"
      ],
      "correctAnswerIndex": 1,
      "explanation": "KNN struggles with overlapping classes as neighbors from the wrong class may dominate."
    },
    {
      "id": 77,
      "questionText": "Which method improves KNN performance for very high-dimensional image embeddings?",
      "options": [
        "Use raw pixel values directly",
        "Dimensionality reduction (PCA, t-SNE, or UMAP)",
        "Increase k to max",
        "Randomly shuffle features"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Reducing dimensions retains essential information and makes distances meaningful."
    },
    {
      "id": 78,
      "questionText": "Scenario: A fraud detection system uses KNN. New types of fraud appear. What is the limitation?",
      "options": [
        "KNN cannot detect unseen patterns without similar neighbors",
        "Prediction latency decreases",
        "KNN automatically adapts",
        "Accuracy improves with noise"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KNN relies on similarity to existing points, so unseen patterns are difficult to detect."
    },
    {
      "id": 79,
      "questionText": "What is a limitation of KNN in large-scale recommendation systems?",
      "options": [
        "Cannot handle numeric data",
        "Fails on binary features",
        "Overfits automatically",
        "Memory and computation intensive"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Storing all user-item interactions and computing distances is memory and CPU intensive."
    },
    {
      "id": 80,
      "questionText": "Which approach is suitable for speeding up KNN with millions of samples?",
      "options": [
        "Use weighted voting only",
        "Increase k to n",
        "Use approximate nearest neighbor libraries like FAISS or Annoy",
        "Normalize features only"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Approximate search libraries significantly reduce computation while maintaining near-optimal accuracy."
    },
    {
      "id": 81,
      "questionText": "Scenario: In KNN, a feature has a huge numeric range. What problem arises?",
      "options": [
        "Feature dominates distance, biasing prediction",
        "Weighted voting fails",
        "Prediction latency reduces",
        "Feature is ignored automatically"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Large-scale features dominate distance computation, skewing predictions unless scaled."
    },
    {
      "id": 82,
      "questionText": "What is a strategy to handle missing values in KNN?",
      "options": [
        "Impute missing values before computing distances",
        "Increase k to handle missing",
        "Ignore missing values automatically",
        "Remove all neighbors with missing values"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Missing values should be imputed (mean, median, or mode) to allow proper distance computation."
    },
    {
      "id": 83,
      "questionText": "Scenario: In medical diagnosis using KNN, rare disease cases are underrepresented. Which is a solution?",
      "options": [
        "Use raw unscaled features",
        "Ignore minority class",
        "Weighted voting or synthetic oversampling (SMOTE)",
        "Reduce k to 1"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Weighted voting or synthetic oversampling addresses imbalance and improves prediction of rare cases."
    },
    {
      "id": 84,
      "questionText": "Which technique reduces distance computation in high-dimensional KNN?",
      "options": [
        "Random shuffling",
        "Adding irrelevant features",
        "Dimensionality reduction",
        "Increasing k to n"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Reducing dimensions reduces number of calculations and improves neighbor relevance."
    },
    {
      "id": 85,
      "questionText": "Scenario: In KNN regression, a few extreme neighbor values exist. What is the impact?",
      "options": [
        "Prediction unaffected",
        "Predicted value may be skewed unless weighted",
        "Accuracy improves automatically",
        "KNN fails completely"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Outliers can bias the predicted mean; using weighted averaging mitigates this effect."
    },
    {
      "id": 86,
      "questionText": "What is a benefit of KD-Tree in KNN?",
      "options": [
        "Reduces neighbor search complexity in low dimensions",
        "Reduces bias of model",
        "Automatically scales features",
        "Increases training time significantly"
      ],
      "correctAnswerIndex": 0,
      "explanation": "KD-Tree allows efficient nearest neighbor search in low to moderate dimensions."
    },
    {
      "id": 87,
      "questionText": "Scenario: KNN is applied on time-series data without preprocessing. What is a potential problem?",
      "options": [
        "Outliers are automatically removed",
        "Accuracy automatically improves",
        "Distance metric ignores temporal order",
        "KNN predicts trends perfectly"
      ],
      "correctAnswerIndex": 2,
      "explanation": "KNN does not account for temporal order; raw time-series may not capture pattern similarity properly."
    },
    {
      "id": 88,
      "questionText": "Which scenario illustrates KNN's limitation?",
      "options": [
        "Balanced, low-dimensional data",
        "Normalized dataset",
        "A new point far from all existing points",
        "Few noise-free features"
      ],
      "correctAnswerIndex": 2,
      "explanation": "When a point is far from all neighbors, KNN cannot predict reliably."
    },
    {
      "id": 89,
      "questionText": "Scenario: KNN used for text document classification with TF-IDF vectors. Which step is crucial?",
      "options": [
        "Increase k to dataset size",
        "Adding irrelevant terms",
        "Ignore vector scaling",
        "L2 normalization to make distances comparable"
      ],
      "correctAnswerIndex": 3,
      "explanation": "TF-IDF vectors should be normalized to ensure fair distance computation."
    },
    {
      "id": 90,
      "questionText": "Scenario: KNN struggles with overlapping clusters in feature space. What is a solution?",
      "options": [
        "Use feature engineering to separate clusters",
        "Ignore scaling",
        "Increase k arbitrarily",
        "Remove minority points"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Engineering features that better separate classes improves KNN accuracy."
    },
    {
      "id": 91,
      "questionText": "Which approach can improve KNN in very large datasets without losing much accuracy?",
      "options": [
        "Use approximate nearest neighbor search",
        "Increase k to dataset size",
        "Ignore preprocessing",
        "Add random noise"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Approximate search reduces computation while keeping predictions close to exact KNN."
    },
    {
      "id": 92,
      "questionText": "Scenario: Online KNN requires predictions every second. Challenge?",
      "options": [
        "Cannot handle numeric data",
        "KNN scales features automatically",
        "High latency due to full distance computation",
        "Overfitting automatically"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Real-time prediction is slow because KNN computes distance to all points at query time."
    },
    {
      "id": 93,
      "questionText": "Scenario: Multi-class KNN with imbalanced classes. What can improve fairness?",
      "options": [
        "Use k=1 always",
        "Random shuffling",
        "Distance-weighted voting",
        "Ignore minority classes"
      ],
      "correctAnswerIndex": 2,
      "explanation": "Weighted voting ensures closer neighbors have more influence, improving minority class predictions."
    },
    {
      "id": 94,
      "questionText": "Scenario: A KNN model is deployed for anomaly detection. Limitation?",
      "options": [
        "Rare anomalies may have no close neighbors",
        "Weighted KNN solves all issues",
        "Accuracy improves automatically",
        "Feature scaling is irrelevant"
      ],
      "correctAnswerIndex": 0,
      "explanation": "If anomalies are isolated, KNN cannot detect them due to lack of nearby points."
    },
    {
      "id": 95,
      "questionText": "Scenario: In high-dimensional image retrieval, KNN prediction is slow. Solution?",
      "options": [
        "Use raw pixel vectors",
        "Use approximate nearest neighbor algorithms like FAISS",
        "Increase k arbitrarily",
        "Ignore normalization"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Approximate algorithms reduce computation significantly while maintaining retrieval quality."
    },
    {
      "id": 96,
      "questionText": "Which scenario can lead to KNN overfitting?",
      "options": [
        "Very small k and noisy data",
        "Large k with clean data",
        "Normalized features",
        "Weighted voting"
      ],
      "correctAnswerIndex": 0,
      "explanation": "Small k may fit noise and outliers, causing overfitting."
    },
    {
      "id": 97,
      "questionText": "Scenario: KNN regression for house prices with outlier houses. Best approach?",
      "options": [
        "Increase k arbitrarily",
        "Remove scaling",
        "Simple mean ignoring distances",
        "Weighted averaging by distance"
      ],
      "correctAnswerIndex": 3,
      "explanation": "Weighted averaging reduces outlier impact, giving closer neighbors more influence."
    },
    {
      "id": 98,
      "questionText": "Scenario: KNN applied to large sparse matrix of user ratings. Challenge?",
      "options": [
        "Distance metric fails",
        "High memory usage and computation",
        "Overfitting automatically",
        "Minority class ignored"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Sparse matrices require storing many zeros and computing many distances, which is expensive."
    },
    {
      "id": 99,
      "questionText": "Scenario: Real-time KNN requires prediction in milliseconds. Solution?",
      "options": [
        "Use weighted voting only",
        "Use approximate nearest neighbor search",
        "Increase k to n",
        "Ignore feature scaling"
      ],
      "correctAnswerIndex": 1,
      "explanation": "Approximate methods like Annoy or FAISS significantly speed up prediction for large datasets."
    },
    {
      "id": 100,
      "questionText": "Scenario: High-dimensional text KNN classification. Which step is crucial?",
      "options": [
        "Ignore scaling",
        "Dimensionality reduction or normalization",
        "Add random features",
        "Use raw text vectors"
      ],
      "correctAnswerIndex": 1,
      "explanation": "High-dimensional text vectors suffer from the curse of dimensionality; normalization (like L2) or dimensionality reduction is needed to make distances meaningful."
    }
  ]
}