File size: 45,960 Bytes
0d00d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 |
{
"title": "K-Nearest Neighbors (KNN) Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions focused entirely on K-Nearest Neighbors (KNN) — covering intuition, distance metrics, hyperparameter tuning, classification & regression behavior, curse of dimensionality, and real-world use cases.",
"questions": [
{
"id": 1,
"questionText": "What is the core principle behind the KNN algorithm?",
"options": [
"It builds a decision tree and splits data recursively.",
"It constructs a probabilistic model using Bayes theorem.",
"It predicts the label based on the majority class of k nearest data points.",
"It reduces dimensionality using PCA."
],
"correctAnswerIndex": 2,
"explanation": "KNN predicts the class based on voting from the k nearest neighbors in the training data."
},
{
"id": 2,
"questionText": "KNN is considered which type of learning algorithm?",
"options": [
"Eager learning",
"Reinforcement learning",
"Unsupervised learning",
"Lazy learning"
],
"correctAnswerIndex": 3,
"explanation": "KNN is a lazy learner because it does not build a model during training; it only stores the data."
},
{
"id": 3,
"questionText": "Which distance metric is most commonly used in KNN?",
"options": [
"Cosine Similarity",
"Manhattan Distance",
"Jaccard Distance",
"Euclidean Distance"
],
"correctAnswerIndex": 3,
"explanation": "Euclidean Distance (L2 norm) is the most common distance metric used in KNN."
},
{
"id": 4,
"questionText": "KNN is mainly used for:",
"options": [
"Only regression",
"Only clustering",
"Both classification and regression",
"Only classification"
],
"correctAnswerIndex": 2,
"explanation": "KNN can perform both classification (class votes) and regression (mean of k nearest values)."
},
{
"id": 5,
"questionText": "What happens if k is set to a very large value?",
"options": [
"The model becomes faster and more accurate always.",
"The model becomes overly generalized and biased.",
"The model becomes highly sensitive to noise.",
"The model becomes very overfitted."
],
"correctAnswerIndex": 1,
"explanation": "A very large k considers too many neighbors and may smooth out genuine class boundaries, causing high bias."
},
{
"id": 6,
"questionText": "In KNN, what does the parameter 'k' represent?",
"options": [
"Number of features in the dataset",
"Depth of the tree used internally",
"Number of nearest neighbors considered",
"Learning rate of the algorithm"
],
"correctAnswerIndex": 2,
"explanation": "'k' is the number of closest neighbors used to decide the predicted class or value."
},
{
"id": 7,
"questionText": "Which of the following is true about KNN training phase?",
"options": [
"It stores all training data without building a model",
"It builds a model by computing centroids",
"It generates decision boundaries explicitly",
"It calculates feature importance scores"
],
"correctAnswerIndex": 0,
"explanation": "KNN is a lazy learner; during training, it only stores the dataset for use at prediction time."
},
{
"id": 8,
"questionText": "Which KNN variant can handle weighted voting?",
"options": [
"Uniform KNN",
"Decision Tree",
"Random Forest",
"Weighted KNN"
],
"correctAnswerIndex": 3,
"explanation": "Weighted KNN gives closer neighbors higher influence while predicting the output."
},
{
"id": 9,
"questionText": "Which of the following affects KNN performance the most?",
"options": [
"Choice of distance metric",
"Activation function",
"Regularization parameter",
"Number of epochs"
],
"correctAnswerIndex": 0,
"explanation": "KNN relies on distance computations; the choice of distance metric (Euclidean, Manhattan, etc.) is critical."
},
{
"id": 10,
"questionText": "What is the default distance metric for most KNN implementations?",
"options": [
"Cosine similarity",
"Manhattan distance",
"Hamming distance",
"Euclidean distance"
],
"correctAnswerIndex": 3,
"explanation": "Euclidean distance is most commonly used by default in KNN implementations."
},
{
"id": 11,
"questionText": "How does KNN handle a new data point for prediction?",
"options": [
"It updates its model parameters",
"It finds k closest points in the training set and predicts based on them",
"It generates random prediction",
"It builds a regression line through neighbors"
],
"correctAnswerIndex": 1,
"explanation": "KNN predicts by looking at the nearest k training points and using majority vote (classification) or average (regression)."
},
{
"id": 12,
"questionText": "What is the main drawback of KNN on large datasets?",
"options": [
"Does not scale to many classes",
"High training time",
"Cannot handle missing values",
"High prediction time"
],
"correctAnswerIndex": 3,
"explanation": "KNN stores all training data, so prediction involves computing distances to all points, which is slow for large datasets."
},
{
"id": 13,
"questionText": "Which of the following is true about KNN and normalization?",
"options": [
"Normalization is not required",
"Normalization only applies to categorical data",
"Normalization changes class labels",
"Normalization improves distance-based predictions"
],
"correctAnswerIndex": 3,
"explanation": "Since KNN uses distances, features with larger scales can dominate. Normalization ensures fair contribution from all features."
},
{
"id": 14,
"questionText": "How does KNN behave in the presence of irrelevant features?",
"options": [
"Features are automatically ignored",
"Performance improves",
"Performance drops",
"Algorithm ignores them during prediction"
],
"correctAnswerIndex": 2,
"explanation": "Irrelevant features can distort distance calculations and reduce KNN prediction accuracy."
},
{
"id": 15,
"questionText": "What type of algorithm is KNN considered in terms of model structure?",
"options": [
"Non-parametric",
"Linear",
"Probabilistic",
"Parametric"
],
"correctAnswerIndex": 0,
"explanation": "KNN is non-parametric because it does not assume a predefined form for the function mapping inputs to outputs."
},
{
"id": 16,
"questionText": "Which K value is generally recommended to avoid overfitting in KNN?",
"options": [
"k moderate value like sqrt(n)",
"k = 1",
"k very small",
"k equal to dataset size"
],
"correctAnswerIndex": 0,
"explanation": "A moderate k like sqrt(n) balances bias and variance, preventing overfitting."
},
{
"id": 17,
"questionText": "Which metric is suitable for categorical variables in KNN?",
"options": [
"Minkowski distance",
"Manhattan distance",
"Euclidean distance",
"Hamming distance"
],
"correctAnswerIndex": 3,
"explanation": "Hamming distance counts mismatches between categorical feature values."
},
{
"id": 18,
"questionText": "Which of the following is NOT a type of KNN?",
"options": [
"Weighted KNN",
"Regression KNN",
"Decision KNN",
"Classification KNN"
],
"correctAnswerIndex": 2,
"explanation": "There is no 'Decision KNN'; KNN is mainly classification, regression, or weighted variant."
},
{
"id": 19,
"questionText": "What is the effect of having two classes with very imbalanced sizes in KNN?",
"options": [
"Minority class dominates predictions",
"Majority class dominates predictions",
"KNN automatically balances classes",
"Minor impact on accuracy"
],
"correctAnswerIndex": 1,
"explanation": "KNN predictions are influenced by majority neighbors; imbalanced classes may bias the results."
},
{
"id": 20,
"questionText": "What is the primary storage requirement for KNN?",
"options": [
"Feature coefficients",
"All training data points",
"Decision thresholds",
"Distance matrices precomputed"
],
"correctAnswerIndex": 1,
"explanation": "KNN requires storing all training data for distance comparisons at prediction time."
},
{
"id": 21,
"questionText": "What does the term 'curse of dimensionality' refer to in KNN?",
"options": [
"Overfitting in small datasets",
"High computation time with too many neighbors",
"Distances become less meaningful in high dimensions",
"Underfitting in large datasets"
],
"correctAnswerIndex": 2,
"explanation": "As dimensions increase, data points become sparse and distance measures lose effectiveness, reducing KNN performance."
},
{
"id": 22,
"questionText": "Which technique can speed up KNN on large datasets?",
"options": [
"KD-Trees or Ball-Trees",
"Using logistic regression instead",
"Principal Component Analysis",
"Random Forest preprocessing"
],
"correctAnswerIndex": 0,
"explanation": "KD-Trees and Ball-Trees organize data to quickly find nearest neighbors without computing all distances."
},
{
"id": 23,
"questionText": "In KNN regression, how is the predicted value calculated?",
"options": [
"Using linear regression on neighbors",
"Using gradient descent",
"Majority vote of nearest neighbors",
"Average of nearest neighbors’ values"
],
"correctAnswerIndex": 3,
"explanation": "KNN regression predicts by taking the mean (or sometimes weighted mean) of the k nearest neighbors' values."
},
{
"id": 24,
"questionText": "Which of the following is true about KNN decision boundary?",
"options": [
"Always axis-aligned",
"Always linear",
"Depends on data distribution",
"Always circular"
],
"correctAnswerIndex": 2,
"explanation": "KNN decision boundaries can be irregular and follow the shape of data; they are not restricted to linear forms."
},
{
"id": 25,
"questionText": "Which method can improve KNN on high-dimensional data?",
"options": [
"Increasing k to dataset size",
"Feature selection",
"Ignoring normalization",
"Adding more neighbors"
],
"correctAnswerIndex": 1,
"explanation": "Selecting relevant features reduces dimensionality, improving distance calculation reliability."
},
{
"id": 26,
"questionText": "KNN cannot handle which of the following natively?",
"options": [
"Large datasets efficiently",
"Numeric features",
"Categorical features",
"Missing data directly"
],
"correctAnswerIndex": 3,
"explanation": "KNN cannot handle missing values without preprocessing (imputation or removal)."
},
{
"id": 27,
"questionText": "How does KNN handle ties in classification voting?",
"options": [
"Chooses randomly among tied classes",
"Fails with an error",
"Chooses the closest neighbor's class",
"Always chooses class 0"
],
"correctAnswerIndex": 2,
"explanation": "Many implementations break ties by selecting the class of the closest neighbor among the tied classes."
},
{
"id": 28,
"questionText": "Which scenario would make KNN less suitable?",
"options": [
"Low-dimensional small datasets",
"High-dimensional large datasets",
"Well-separated clusters",
"Binary classification"
],
"correctAnswerIndex": 1,
"explanation": "In high-dimensional large datasets, KNN is slow and distances lose meaning, reducing accuracy."
},
{
"id": 29,
"questionText": "What is the time complexity of a naive KNN prediction with n training points?",
"options": [
"O(n^2)",
"O(1)",
"O(log n)",
"O(n)"
],
"correctAnswerIndex": 3,
"explanation": "Naive KNN computes distances to all n points for each prediction, giving O(n) complexity."
},
{
"id": 30,
"questionText": "What preprocessing step can improve KNN accuracy?",
"options": [
"Adding irrelevant features",
"Removing the dependent variable",
"Randomly shuffling the data",
"Scaling features to similar range"
],
"correctAnswerIndex": 3,
"explanation": "Scaling features ensures fair distance computation, preventing one feature from dominating due to larger numeric range."
},
{
"id": 31,
"questionText": "What is the effect of increasing 'k' in KNN classification?",
"options": [
"Decreases bias",
"Reduces overfitting",
"Increases sensitivity to noise",
"Increases model variance"
],
"correctAnswerIndex": 1,
"explanation": "A larger k smooths out predictions, reducing overfitting and variance but increasing bias."
},
{
"id": 32,
"questionText": "Which distance metric can be more robust to outliers in KNN?",
"options": [
"Cosine similarity",
"Minkowski distance",
"Manhattan distance",
"Euclidean distance"
],
"correctAnswerIndex": 2,
"explanation": "Manhattan distance is less sensitive to large deviations in individual features than Euclidean distance."
},
{
"id": 33,
"questionText": "How can KNN be modified for imbalanced datasets?",
"options": [
"Use weighted voting based on distance",
"Increase k to dataset size",
"Normalize features only",
"Remove minority class samples"
],
"correctAnswerIndex": 0,
"explanation": "Weighted voting gives closer neighbors more influence, reducing bias toward the majority class."
},
{
"id": 34,
"questionText": "Which method can reduce KNN prediction time for large datasets?",
"options": [
"Dimensionality reduction like PCA",
"Using random shuffling",
"Increasing k",
"Adding more features"
],
"correctAnswerIndex": 0,
"explanation": "Reducing the number of features with PCA lowers dimensionality, which speeds up distance computation."
},
{
"id": 35,
"questionText": "Why might KNN fail in very high-dimensional spaces?",
"options": [
"Overfitting to majority class",
"Random initialization",
"Learning rate too high",
"Curse of dimensionality"
],
"correctAnswerIndex": 3,
"explanation": "In high dimensions, points become equidistant and neighbors are less meaningful, reducing accuracy."
},
{
"id": 36,
"questionText": "What does weighted KNN regression use instead of simple averaging?",
"options": [
"Distance-based weighting of neighbors",
"Median value of neighbors",
"Majority vote of neighbors",
"Random selection of neighbors"
],
"correctAnswerIndex": 0,
"explanation": "Weighted KNN regression assigns higher weights to closer neighbors when computing the predicted value."
},
{
"id": 37,
"questionText": "Which technique is useful to handle categorical and numeric features together in KNN?",
"options": [
"Ignore numeric features",
"Convert categorical to numeric with one-hot encoding",
"Normalize categorical features only",
"Use majority voting only"
],
"correctAnswerIndex": 1,
"explanation": "One-hot encoding transforms categorical features to numeric so that distance metrics can be applied."
},
{
"id": 38,
"questionText": "In KNN, what is the effect of noisy features?",
"options": [
"Does not affect performance",
"Automatically removed",
"Reduces accuracy",
"Improves accuracy"
],
"correctAnswerIndex": 2,
"explanation": "Noisy features distort distance calculations, reducing prediction accuracy."
},
{
"id": 39,
"questionText": "Which of the following can help KNN generalize better?",
"options": [
"Adding more irrelevant features",
"Reducing k to 1",
"Feature scaling and selection",
"Increasing dataset size without preprocessing"
],
"correctAnswerIndex": 2,
"explanation": "Scaling ensures fair distance comparison, and selecting relevant features removes noise, improving generalization."
},
{
"id": 40,
"questionText": "What happens if k is even and there is a tie in classification?",
"options": [
"Prediction fails with error",
"Tie-breaking strategy is needed",
"Algorithm automatically increments k",
"Randomly ignores the new point"
],
"correctAnswerIndex": 1,
"explanation": "When k is even, ties may occur; most implementations have a tie-breaking rule like choosing the closest neighbor."
},
{
"id": 41,
"questionText": "Which preprocessing step can improve KNN on text data represented by TF-IDF vectors?",
"options": [
"L2 normalization",
"Random shuffling",
"Adding more terms",
"Stop-word removal only"
],
"correctAnswerIndex": 0,
"explanation": "L2 normalization ensures vectors are comparable in distance calculations for KNN."
},
{
"id": 42,
"questionText": "Which of the following affects KNN accuracy most in practice?",
"options": [
"Learning rate",
"Random seed only",
"Distance metric and k",
"Number of trees"
],
"correctAnswerIndex": 2,
"explanation": "Choice of k and distance metric strongly influence KNN performance."
},
{
"id": 43,
"questionText": "In KNN regression, how can you reduce the impact of outliers?",
"options": [
"Use simple mean without weighting",
"Increase k to dataset size",
"Ignore preprocessing",
"Use weighted averaging based on distance"
],
"correctAnswerIndex": 3,
"explanation": "Weighting closer neighbors more heavily reduces the effect of distant outliers."
},
{
"id": 44,
"questionText": "Which approach can make KNN faster on large datasets?",
"options": [
"Increase k to max",
"Add random noise to data",
"KD-Tree, Ball-Tree, or approximate nearest neighbor search",
"Use high-dimensional features"
],
"correctAnswerIndex": 2,
"explanation": "Tree-based or approximate search structures reduce distance computations needed for prediction."
},
{
"id": 45,
"questionText": "How does KNN handle multi-class classification?",
"options": [
"By majority vote among neighbors",
"Cannot handle multi-class",
"By training separate binary classifiers",
"Only predicts top two classes"
],
"correctAnswerIndex": 0,
"explanation": "KNN counts votes among k neighbors for all classes and selects the class with the highest votes."
},
{
"id": 46,
"questionText": "Which distance metric is suitable for high-dimensional sparse data?",
"options": [
"Manhattan distance",
"Euclidean distance",
"Cosine similarity",
"Hamming distance"
],
"correctAnswerIndex": 2,
"explanation": "Cosine similarity works better for high-dimensional sparse vectors like TF-IDF representations."
},
{
"id": 47,
"questionText": "What happens to KNN performance if features are not scaled?",
"options": [
"Dominated by features with larger scales",
"Performance improves automatically",
"Distance calculation is unaffected",
"Accuracy remains same always"
],
"correctAnswerIndex": 0,
"explanation": "Features with larger numeric ranges dominate distance computation, skewing predictions."
},
{
"id": 48,
"questionText": "How can KNN be adapted for regression with categorical features?",
"options": [
"Encode categories numerically or use mixed distance metric",
"Use Euclidean distance directly",
"Remove categorical features",
"Only predict the most frequent category"
],
"correctAnswerIndex": 0,
"explanation": "Encoding categorical features allows KNN to compute distances effectively for regression tasks."
},
{
"id": 49,
"questionText": "What is one common method to select an optimal k?",
"options": [
"Maximizing feature count",
"Using k=1 always",
"Random selection",
"Cross-validation"
],
"correctAnswerIndex": 3,
"explanation": "Cross-validation evaluates different k values to choose the one yielding best performance."
},
{
"id": 50,
"questionText": "Which factor can lead to overfitting in KNN?",
"options": [
"Using fewer neighbors",
"Too small k value",
"Scaling features",
"Using weighted distance"
],
"correctAnswerIndex": 1,
"explanation": "A very small k (like k=1) can fit to noise and outliers, causing overfitting."
},
{
"id": 51,
"questionText": "In KNN, what is an advantage of using odd k values in binary classification?",
"options": [
"Avoid ties in voting",
"Reduce distance calculations",
"Increase speed",
"Improve scaling automatically"
],
"correctAnswerIndex": 0,
"explanation": "Odd k values help prevent ties between two classes."
},
{
"id": 52,
"questionText": "Which type of feature transformation is recommended for KNN?",
"options": [
"Adding irrelevant features",
"One-hot encoding only for numeric data",
"Normalization or standardization",
"Random shuffling of features"
],
"correctAnswerIndex": 2,
"explanation": "Normalization ensures fair contribution of each feature to distance calculation."
},
{
"id": 53,
"questionText": "Which of the following reduces KNN sensitivity to outliers?",
"options": [
"Increase k to 1",
"Use Euclidean distance only",
"Remove normalization",
"Weighted distance averaging"
],
"correctAnswerIndex": 3,
"explanation": "Weighting neighbors based on distance gives closer points more influence, reducing outlier impact."
},
{
"id": 54,
"questionText": "In KNN, what is the effect of adding irrelevant features?",
"options": [
"Automatically removed",
"Decreases accuracy",
"Increases accuracy",
"No effect"
],
"correctAnswerIndex": 1,
"explanation": "Irrelevant features distort distance calculations, reducing prediction accuracy."
},
{
"id": 55,
"questionText": "Which method can improve KNN performance in sparse datasets?",
"options": [
"Ignore distance weighting",
"Add noise to features",
"Dimensionality reduction",
"Increase k to dataset size"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensionality can make distance computations more meaningful in sparse datasets."
},
{
"id": 56,
"questionText": "Which approach helps handle large-scale KNN efficiently?",
"options": [
"Increasing k arbitrarily",
"Scaling only",
"Approximate nearest neighbor search",
"Random shuffling"
],
"correctAnswerIndex": 2,
"explanation": "Approximate nearest neighbor search reduces computational cost while giving nearly correct neighbors."
},
{
"id": 57,
"questionText": "Which of the following is true for KNN regression prediction?",
"options": [
"Weighted average based on neighbor distance",
"Average of nearest neighbors’ values",
"None of the above",
"Both A and B"
],
"correctAnswerIndex": 3,
"explanation": "KNN regression can use simple or weighted averaging of neighbors’ values."
},
{
"id": 58,
"questionText": "Which is a practical drawback of KNN in real-world systems?",
"options": [
"Requires model training",
"High prediction latency",
"Automatically ignores irrelevant features",
"Cannot handle numeric data"
],
"correctAnswerIndex": 1,
"explanation": "KNN computes distances at prediction time, leading to high latency for large datasets."
},
{
"id": 59,
"questionText": "Which type of scaling preserves relative distances between points for KNN?",
"options": [
"Min-Max scaling",
"Log transformation only",
"Adding random noise",
"Shuffling features"
],
"correctAnswerIndex": 0,
"explanation": "Min-Max or standardization scales features to similar ranges while preserving relative distances."
},
{
"id": 60,
"questionText": "Which is a disadvantage of KNN compared to parametric models?",
"options": [
"Requires fixed training",
"Slower predictions for large datasets",
"Cannot model non-linear boundaries",
"Sensitive to overfitting only"
],
"correctAnswerIndex": 1,
"explanation": "KNN stores all training data and computes distances, making predictions slower than parametric models."
},
{
"id": 61,
"questionText": "How can KNN handle multi-label classification?",
"options": [
"Uses separate KNN per label",
"Cannot handle multi-label",
"Predict all labels present in neighbors",
"Only predicts one label"
],
"correctAnswerIndex": 2,
"explanation": "KNN can aggregate labels from neighbors and predict multiple labels per instance."
},
{
"id": 62,
"questionText": "Which distance metric can handle mixed numeric and categorical data?",
"options": [
"Gower distance",
"Euclidean distance",
"Cosine similarity",
"Manhattan distance"
],
"correctAnswerIndex": 0,
"explanation": "Gower distance can compute similarity for mixed numeric and categorical features."
},
{
"id": 63,
"questionText": "What is one way to reduce memory usage in KNN for large datasets?",
"options": [
"Use condensed nearest neighbor algorithms",
"Ignore irrelevant features",
"Increase k to dataset size",
"Normalize only"
],
"correctAnswerIndex": 0,
"explanation": "Condensed nearest neighbor algorithms reduce stored points while maintaining accuracy."
},
{
"id": 64,
"questionText": "Which approach helps improve KNN in imbalanced datasets?",
"options": [
"Increase irrelevant features",
"Use k=1 always",
"Distance-weighted voting",
"Ignore normalization"
],
"correctAnswerIndex": 2,
"explanation": "Weighted voting gives closer points more influence, reducing bias toward majority class."
},
{
"id": 65,
"questionText": "What is the effect of increasing feature dimensionality in KNN?",
"options": [
"Computation decreases",
"Feature importance is automatically computed",
"Accuracy always improves",
"Distances become less meaningful"
],
"correctAnswerIndex": 3,
"explanation": "High-dimensional spaces make points almost equidistant, reducing KNN effectiveness."
},
{
"id": 66,
"questionText": "Which scenario can cause KNN to misclassify a data point?",
"options": [
"Choosing odd k",
"Using weighted voting",
"Nearby points from other class dominate",
"Normalization applied"
],
"correctAnswerIndex": 2,
"explanation": "If neighbors are closer from other classes, KNN may predict incorrectly."
},
{
"id": 67,
"questionText": "Which strategy can improve KNN with very sparse datasets?",
"options": [
"Add random features",
"Ignore distance metric",
"Dimensionality reduction",
"Increase k arbitrarily"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensionality reduces sparsity and makes distances meaningful."
},
{
"id": 68,
"questionText": "What is a good rule of thumb for selecting k?",
"options": [
"k = 1 always",
"k = n/2",
"k = sqrt(n)",
"k = number of features"
],
"correctAnswerIndex": 2,
"explanation": "Using k = sqrt(n) balances bias and variance in most cases."
},
{
"id": 69,
"questionText": "Which technique can speed up KNN predictions in high dimensions?",
"options": [
"Approximate nearest neighbor algorithms",
"Normalize only",
"Random shuffling",
"Increase k to max"
],
"correctAnswerIndex": 0,
"explanation": "Approximate nearest neighbor search reduces computation while maintaining accuracy."
},
{
"id": 70,
"questionText": "Which type of data preprocessing improves KNN performance?",
"options": [
"Random shuffling only",
"Ignoring categorical features",
"Adding irrelevant features",
"Feature scaling and selection"
],
"correctAnswerIndex": 3,
"explanation": "Scaling ensures fair distance measurement, and selecting relevant features removes noise, improving predictions."
},
{
"id": 71,
"questionText": "In a recommendation system using KNN, what could cause poor predictions?",
"options": [
"High number of neighbors",
"Sparse user-item interaction data",
"Low-dimensional features",
"Normalized data"
],
"correctAnswerIndex": 1,
"explanation": "Sparse interaction matrices reduce neighbor similarity reliability, causing poor recommendations."
},
{
"id": 72,
"questionText": "Which approach is suitable for reducing KNN latency in a real-time system?",
"options": [
"Randomly select features",
"Increase k to dataset size",
"Normalize data only",
"Approximate nearest neighbor search"
],
"correctAnswerIndex": 3,
"explanation": "Approximate nearest neighbor algorithms provide fast predictions with minimal accuracy loss."
},
{
"id": 73,
"questionText": "In high-dimensional gene expression data, KNN performance drops because:",
"options": [
"Normalization causes data loss",
"KNN overfits easily with large k",
"Distances become less informative (curse of dimensionality)",
"Minority classes dominate"
],
"correctAnswerIndex": 2,
"explanation": "High-dimensional data makes points nearly equidistant, reducing neighbor relevance and accuracy."
},
{
"id": 74,
"questionText": "Scenario: A new customer profile is very different from existing customers. Which issue might KNN face?",
"options": [
"Predicted class may be inaccurate due to no similar neighbors",
"KNN will automatically ignore the profile",
"Model overfits automatically",
"KNN will generate a new class"
],
"correctAnswerIndex": 0,
"explanation": "If no close neighbors exist, KNN cannot provide reliable predictions."
},
{
"id": 75,
"questionText": "What is the main challenge of KNN when deployed in high-frequency trading?",
"options": [
"Weighted voting is not supported",
"Overfitting to training set",
"Distance metric fails for numeric data",
"High prediction latency due to large datasets"
],
"correctAnswerIndex": 3,
"explanation": "KNN requires computing distances to all stored points, making it too slow for real-time predictions in trading."
},
{
"id": 76,
"questionText": "Scenario: Two classes are very close in feature space but overlapping. Which KNN behavior is expected?",
"options": [
"KNN ignores overlapping points",
"Higher misclassification rate",
"KNN increases k automatically",
"Predictions are perfect"
],
"correctAnswerIndex": 1,
"explanation": "KNN struggles with overlapping classes as neighbors from the wrong class may dominate."
},
{
"id": 77,
"questionText": "Which method improves KNN performance for very high-dimensional image embeddings?",
"options": [
"Use raw pixel values directly",
"Dimensionality reduction (PCA, t-SNE, or UMAP)",
"Increase k to max",
"Randomly shuffle features"
],
"correctAnswerIndex": 1,
"explanation": "Reducing dimensions retains essential information and makes distances meaningful."
},
{
"id": 78,
"questionText": "Scenario: A fraud detection system uses KNN. New types of fraud appear. What is the limitation?",
"options": [
"KNN cannot detect unseen patterns without similar neighbors",
"Prediction latency decreases",
"KNN automatically adapts",
"Accuracy improves with noise"
],
"correctAnswerIndex": 0,
"explanation": "KNN relies on similarity to existing points, so unseen patterns are difficult to detect."
},
{
"id": 79,
"questionText": "What is a limitation of KNN in large-scale recommendation systems?",
"options": [
"Cannot handle numeric data",
"Fails on binary features",
"Overfits automatically",
"Memory and computation intensive"
],
"correctAnswerIndex": 3,
"explanation": "Storing all user-item interactions and computing distances is memory and CPU intensive."
},
{
"id": 80,
"questionText": "Which approach is suitable for speeding up KNN with millions of samples?",
"options": [
"Use weighted voting only",
"Increase k to n",
"Use approximate nearest neighbor libraries like FAISS or Annoy",
"Normalize features only"
],
"correctAnswerIndex": 2,
"explanation": "Approximate search libraries significantly reduce computation while maintaining near-optimal accuracy."
},
{
"id": 81,
"questionText": "Scenario: In KNN, a feature has a huge numeric range. What problem arises?",
"options": [
"Feature dominates distance, biasing prediction",
"Weighted voting fails",
"Prediction latency reduces",
"Feature is ignored automatically"
],
"correctAnswerIndex": 0,
"explanation": "Large-scale features dominate distance computation, skewing predictions unless scaled."
},
{
"id": 82,
"questionText": "What is a strategy to handle missing values in KNN?",
"options": [
"Impute missing values before computing distances",
"Increase k to handle missing",
"Ignore missing values automatically",
"Remove all neighbors with missing values"
],
"correctAnswerIndex": 0,
"explanation": "Missing values should be imputed (mean, median, or mode) to allow proper distance computation."
},
{
"id": 83,
"questionText": "Scenario: In medical diagnosis using KNN, rare disease cases are underrepresented. Which is a solution?",
"options": [
"Use raw unscaled features",
"Ignore minority class",
"Weighted voting or synthetic oversampling (SMOTE)",
"Reduce k to 1"
],
"correctAnswerIndex": 2,
"explanation": "Weighted voting or synthetic oversampling addresses imbalance and improves prediction of rare cases."
},
{
"id": 84,
"questionText": "Which technique reduces distance computation in high-dimensional KNN?",
"options": [
"Random shuffling",
"Adding irrelevant features",
"Dimensionality reduction",
"Increasing k to n"
],
"correctAnswerIndex": 2,
"explanation": "Reducing dimensions reduces number of calculations and improves neighbor relevance."
},
{
"id": 85,
"questionText": "Scenario: In KNN regression, a few extreme neighbor values exist. What is the impact?",
"options": [
"Prediction unaffected",
"Predicted value may be skewed unless weighted",
"Accuracy improves automatically",
"KNN fails completely"
],
"correctAnswerIndex": 1,
"explanation": "Outliers can bias the predicted mean; using weighted averaging mitigates this effect."
},
{
"id": 86,
"questionText": "What is a benefit of KD-Tree in KNN?",
"options": [
"Reduces neighbor search complexity in low dimensions",
"Reduces bias of model",
"Automatically scales features",
"Increases training time significantly"
],
"correctAnswerIndex": 0,
"explanation": "KD-Tree allows efficient nearest neighbor search in low to moderate dimensions."
},
{
"id": 87,
"questionText": "Scenario: KNN is applied on time-series data without preprocessing. What is a potential problem?",
"options": [
"Outliers are automatically removed",
"Accuracy automatically improves",
"Distance metric ignores temporal order",
"KNN predicts trends perfectly"
],
"correctAnswerIndex": 2,
"explanation": "KNN does not account for temporal order; raw time-series may not capture pattern similarity properly."
},
{
"id": 88,
"questionText": "Which scenario illustrates KNN's limitation?",
"options": [
"Balanced, low-dimensional data",
"Normalized dataset",
"A new point far from all existing points",
"Few noise-free features"
],
"correctAnswerIndex": 2,
"explanation": "When a point is far from all neighbors, KNN cannot predict reliably."
},
{
"id": 89,
"questionText": "Scenario: KNN used for text document classification with TF-IDF vectors. Which step is crucial?",
"options": [
"Increase k to dataset size",
"Adding irrelevant terms",
"Ignore vector scaling",
"L2 normalization to make distances comparable"
],
"correctAnswerIndex": 3,
"explanation": "TF-IDF vectors should be normalized to ensure fair distance computation."
},
{
"id": 90,
"questionText": "Scenario: KNN struggles with overlapping clusters in feature space. What is a solution?",
"options": [
"Use feature engineering to separate clusters",
"Ignore scaling",
"Increase k arbitrarily",
"Remove minority points"
],
"correctAnswerIndex": 0,
"explanation": "Engineering features that better separate classes improves KNN accuracy."
},
{
"id": 91,
"questionText": "Which approach can improve KNN in very large datasets without losing much accuracy?",
"options": [
"Use approximate nearest neighbor search",
"Increase k to dataset size",
"Ignore preprocessing",
"Add random noise"
],
"correctAnswerIndex": 0,
"explanation": "Approximate search reduces computation while keeping predictions close to exact KNN."
},
{
"id": 92,
"questionText": "Scenario: Online KNN requires predictions every second. Challenge?",
"options": [
"Cannot handle numeric data",
"KNN scales features automatically",
"High latency due to full distance computation",
"Overfitting automatically"
],
"correctAnswerIndex": 2,
"explanation": "Real-time prediction is slow because KNN computes distance to all points at query time."
},
{
"id": 93,
"questionText": "Scenario: Multi-class KNN with imbalanced classes. What can improve fairness?",
"options": [
"Use k=1 always",
"Random shuffling",
"Distance-weighted voting",
"Ignore minority classes"
],
"correctAnswerIndex": 2,
"explanation": "Weighted voting ensures closer neighbors have more influence, improving minority class predictions."
},
{
"id": 94,
"questionText": "Scenario: A KNN model is deployed for anomaly detection. Limitation?",
"options": [
"Rare anomalies may have no close neighbors",
"Weighted KNN solves all issues",
"Accuracy improves automatically",
"Feature scaling is irrelevant"
],
"correctAnswerIndex": 0,
"explanation": "If anomalies are isolated, KNN cannot detect them due to lack of nearby points."
},
{
"id": 95,
"questionText": "Scenario: In high-dimensional image retrieval, KNN prediction is slow. Solution?",
"options": [
"Use raw pixel vectors",
"Use approximate nearest neighbor algorithms like FAISS",
"Increase k arbitrarily",
"Ignore normalization"
],
"correctAnswerIndex": 1,
"explanation": "Approximate algorithms reduce computation significantly while maintaining retrieval quality."
},
{
"id": 96,
"questionText": "Which scenario can lead to KNN overfitting?",
"options": [
"Very small k and noisy data",
"Large k with clean data",
"Normalized features",
"Weighted voting"
],
"correctAnswerIndex": 0,
"explanation": "Small k may fit noise and outliers, causing overfitting."
},
{
"id": 97,
"questionText": "Scenario: KNN regression for house prices with outlier houses. Best approach?",
"options": [
"Increase k arbitrarily",
"Remove scaling",
"Simple mean ignoring distances",
"Weighted averaging by distance"
],
"correctAnswerIndex": 3,
"explanation": "Weighted averaging reduces outlier impact, giving closer neighbors more influence."
},
{
"id": 98,
"questionText": "Scenario: KNN applied to large sparse matrix of user ratings. Challenge?",
"options": [
"Distance metric fails",
"High memory usage and computation",
"Overfitting automatically",
"Minority class ignored"
],
"correctAnswerIndex": 1,
"explanation": "Sparse matrices require storing many zeros and computing many distances, which is expensive."
},
{
"id": 99,
"questionText": "Scenario: Real-time KNN requires prediction in milliseconds. Solution?",
"options": [
"Use weighted voting only",
"Use approximate nearest neighbor search",
"Increase k to n",
"Ignore feature scaling"
],
"correctAnswerIndex": 1,
"explanation": "Approximate methods like Annoy or FAISS significantly speed up prediction for large datasets."
},
{
"id": 100,
"questionText": "Scenario: High-dimensional text KNN classification. Which step is crucial?",
"options": [
"Ignore scaling",
"Dimensionality reduction or normalization",
"Add random features",
"Use raw text vectors"
],
"correctAnswerIndex": 1,
"explanation": "High-dimensional text vectors suffer from the curse of dimensionality; normalization (like L2) or dimensionality reduction is needed to make distances meaningful."
}
]
}
|