File size: 46,846 Bytes
0d00d62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 |
{
"title": "Boosting Mastery: 100 MCQs",
"description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Boosting, from basic concepts to advanced applications including AdaBoost, Gradient Boosting, XGBoost, and real-world scenarios.",
"questions": [
{
"id": 1,
"questionText": "What is the main goal of Boosting?",
"options": [
"Reduce dataset size",
"Reduce bias and improve predictive accuracy",
"Reduce variance only",
"Cluster similar instances"
],
"correctAnswerIndex": 1,
"explanation": "Boosting is an ensemble technique that sequentially combines weak learners to reduce bias and improve predictive accuracy by focusing on errors made by previous models."
},
{
"id": 2,
"questionText": "Which characteristic defines a weak learner in Boosting?",
"options": [
"Unsupervised algorithm",
"Perfect prediction capability",
"Slightly better than random guessing",
"High-variance model"
],
"correctAnswerIndex": 2,
"explanation": "A weak learner performs slightly better than random chance. Boosting combines many such weak learners to create a strong model."
},
{
"id": 3,
"questionText": "How does Boosting handle misclassified samples?",
"options": [
"It reduces their weights",
"It removes them from the dataset",
"It ignores misclassified samples",
"It increases their weights for the next learner"
],
"correctAnswerIndex": 3,
"explanation": "Boosting assigns higher weights to misclassified samples so that subsequent models focus on them, improving overall accuracy."
},
{
"id": 4,
"questionText": "Which of the following is a common Boosting algorithm?",
"options": [
"PCA",
"Random Forest",
"K-Means",
"AdaBoost"
],
"correctAnswerIndex": 3,
"explanation": "AdaBoost is one of the earliest and most common Boosting algorithms, combining weak learners sequentially."
},
{
"id": 5,
"questionText": "Boosting is generally considered:",
"options": [
"Unsupervised technique",
"Sequential ensemble method",
"Parallel ensemble method",
"Clustering algorithm"
],
"correctAnswerIndex": 1,
"explanation": "Boosting trains models sequentially, each focusing on the errors of the previous model, unlike Bagging which is parallel."
},
{
"id": 6,
"questionText": "In AdaBoost, what does the weight assigned to each weak learner represent?",
"options": [
"Its contribution to the final model",
"Its bias only",
"Its variance",
"Its training time"
],
"correctAnswerIndex": 0,
"explanation": "Each weak learner is assigned a weight based on its accuracy. More accurate learners have higher influence in the final ensemble."
},
{
"id": 7,
"questionText": "Which error type does Boosting primarily aim to reduce?",
"options": [
"Bias",
"Dataset error",
"Irreducible error",
"Variance"
],
"correctAnswerIndex": 0,
"explanation": "Boosting sequentially trains weak learners to correct previous errors, reducing bias and improving model accuracy."
},
{
"id": 8,
"questionText": "What is the key difference between Bagging and Boosting?",
"options": [
"Boosting always uses deep learners",
"Bagging reduces bias; Boosting reduces variance",
"Bagging trains models independently; Boosting sequentially",
"Bagging requires weighted samples; Boosting does not"
],
"correctAnswerIndex": 2,
"explanation": "Bagging reduces variance by averaging independent models. Boosting reduces bias by sequentially training learners focusing on previous errors."
},
{
"id": 9,
"questionText": "Gradient Boosting differs from AdaBoost because it:",
"options": [
"Is unsupervised",
"Uses parallel trees",
"Optimizes a loss function using gradient descent",
"Ignores misclassified samples"
],
"correctAnswerIndex": 2,
"explanation": "Gradient Boosting sequentially fits models to the residuals of the previous model using gradient descent to optimize a chosen loss function."
},
{
"id": 10,
"questionText": "Which metric can be used to evaluate Boosting performance for classification?",
"options": [
"Accuracy, F1-score, AUC",
"Silhouette Score",
"Mean Squared Error only",
"R-squared"
],
"correctAnswerIndex": 0,
"explanation": "Classification metrics such as Accuracy, F1-score, and AUC are suitable for evaluating Boosting performance on classification tasks."
},
{
"id": 11,
"questionText": "Boosting works best with:",
"options": [
"High bias weak learners",
"Clustering algorithms",
"Unsupervised learners",
"Low bias, low variance models"
],
"correctAnswerIndex": 0,
"explanation": "Boosting combines weak learners that are biased but not too complex, sequentially correcting errors to form a strong model."
},
{
"id": 12,
"questionText": "What is the role of learning rate in Boosting?",
"options": [
"Reduces number of features",
"Controls contribution of each weak learner",
"Controls bootstrap sample size",
"Determines tree depth"
],
"correctAnswerIndex": 1,
"explanation": "Learning rate scales the contribution of each weak learner, allowing fine-tuning of the ensemble to prevent overfitting."
},
{
"id": 13,
"questionText": "Which of these is true about overfitting in Boosting?",
"options": [
"Boosting is only for regression",
"Boosting always underfits",
"Boosting can overfit if too many weak learners are used",
"Boosting never overfits"
],
"correctAnswerIndex": 2,
"explanation": "Using too many learners or too complex learners can lead Boosting to overfit, especially with small datasets."
},
{
"id": 14,
"questionText": "XGBoost differs from standard Gradient Boosting in that it:",
"options": [
"Ignores gradients",
"Is unsupervised",
"Does not use trees",
"Uses regularization and optimized computation"
],
"correctAnswerIndex": 3,
"explanation": "XGBoost adds regularization (L1 and L2) and efficient computational techniques, improving performance and reducing overfitting."
},
{
"id": 15,
"questionText": "What is the main advantage of Boosting over a single model?",
"options": [
"Higher accuracy and lower bias",
"Faster training",
"Simplified model interpretation",
"Reduced number of features"
],
"correctAnswerIndex": 0,
"explanation": "By sequentially correcting errors, Boosting often achieves higher accuracy and reduces bias compared to a single model."
},
{
"id": 16,
"questionText": "How does Boosting handle misclassified instances in regression?",
"options": [
"Ignores residuals",
"Uses majority voting",
"Focuses on residuals for next learner",
"Removes outliers completely"
],
"correctAnswerIndex": 2,
"explanation": "In regression, Boosting fits the next weak learner to the residuals (errors) of the previous learner, reducing bias."
},
{
"id": 17,
"questionText": "Which of the following is NOT a Boosting algorithm?",
"options": [
"Gradient Boosting",
"Random Forest",
"AdaBoost",
"XGBoost"
],
"correctAnswerIndex": 1,
"explanation": "Random Forest is a Bagging-based ensemble method, not Boosting."
},
{
"id": 18,
"questionText": "Boosting is most useful when:",
"options": [
"Weak learners have high bias",
"Clustering is needed",
"Base learners have low variance",
"Dataset is extremely large"
],
"correctAnswerIndex": 0,
"explanation": "Boosting reduces bias by combining weak learners that perform slightly better than chance."
},
{
"id": 19,
"questionText": "Which is true about sequential learning in Boosting?",
"options": [
"Bootstrap samples are ignored",
"Each model depends on previous model’s errors",
"Models are trained independently",
"Training is unsupervised"
],
"correctAnswerIndex": 1,
"explanation": "Boosting trains models sequentially, with each learner focusing on the errors of previous learners to reduce bias."
},
{
"id": 20,
"questionText": "Gradient Boosting can be used with which loss functions?",
"options": [
"Only log-loss",
"Only hinge loss",
"Only squared error",
"Any differentiable loss function"
],
"correctAnswerIndex": 3,
"explanation": "Gradient Boosting is flexible and can optimize any differentiable loss function appropriate for the problem."
},
{
"id": 21,
"questionText": "Boosting can handle overfitting better with:",
"options": [
"Higher learning rate",
"Lower learning rate and early stopping",
"Ignoring residuals",
"More features only"
],
"correctAnswerIndex": 1,
"explanation": "A lower learning rate reduces the contribution of each learner, and early stopping prevents adding too many learners, mitigating overfitting."
},
{
"id": 22,
"questionText": "Which property makes Boosting different from Bagging?",
"options": [
"Bootstrap sampling only",
"Random feature selection",
"Sequential error correction",
"Parallel variance reduction"
],
"correctAnswerIndex": 2,
"explanation": "Boosting sequentially corrects errors, whereas Bagging builds independent models in parallel for variance reduction."
},
{
"id": 23,
"questionText": "AdaBoost works primarily with which type of learners?",
"options": [
"Decision stumps",
"Linear regression",
"Deep neural networks",
"Clustering models"
],
"correctAnswerIndex": 0,
"explanation": "AdaBoost often uses simple learners like decision stumps, combining many to form a strong model."
},
{
"id": 24,
"questionText": "Which is a limitation of Boosting?",
"options": [
"Cannot reduce bias",
"Works only for regression",
"Sensitive to noisy data and outliers",
"Does not improve accuracy"
],
"correctAnswerIndex": 2,
"explanation": "Boosting can overfit if data contains noise or outliers because later learners focus on these problematic points."
},
{
"id": 25,
"questionText": "Boosting is considered a strong learner because it:",
"options": [
"Is a single tree",
"Combines multiple weak learners to reduce bias",
"Reduces dataset size",
"Ignores misclassified instances"
],
"correctAnswerIndex": 1,
"explanation": "By sequentially combining weak learners that correct each other’s errors, Boosting produces a high-accuracy strong model."
},
{
"id": 26,
"questionText": "XGBoost improves Gradient Boosting by:",
"options": [
"Adding regularization and efficient computation",
"Reducing dataset size",
"Using unsupervised trees",
"Ignoring residuals"
],
"correctAnswerIndex": 0,
"explanation": "XGBoost introduces L1/L2 regularization and optimized tree construction, improving generalization and speed."
},
{
"id": 27,
"questionText": "What happens if Boosting is applied with very complex base learners?",
"options": [
"Variance is ignored",
"Overfitting is likely",
"Bias reduces automatically",
"Model becomes linear"
],
"correctAnswerIndex": 1,
"explanation": "Using highly complex base learners in Boosting can lead to overfitting, especially if the number of learners is large."
},
{
"id": 28,
"questionText": "Why is learning rate important in Gradient Boosting?",
"options": [
"It selects features randomly",
"It increases dataset size",
"It prevents bootstrapping",
"It controls the step size in gradient descent"
],
"correctAnswerIndex": 3,
"explanation": "Learning rate scales the contribution of each tree in Gradient Boosting, affecting convergence and overfitting."
},
{
"id": 29,
"questionText": "Which of the following best describes Boosting?",
"options": [
"Clustering algorithm",
"Sequential ensemble focusing on reducing bias",
"Dimensionality reduction technique",
"Parallel ensemble focusing on reducing variance"
],
"correctAnswerIndex": 1,
"explanation": "Boosting sequentially trains models to correct errors, reducing bias and improving performance."
},
{
"id": 30,
"questionText": "Which approach can prevent overfitting in Boosting?",
"options": [
"Removing features randomly",
"Early stopping and shrinkage (low learning rate)",
"Increasing tree depth only",
"Ignoring residuals"
],
"correctAnswerIndex": 1,
"explanation": "Early stopping and low learning rate (shrinkage) prevent overfitting by controlling model complexity and contribution of each learner."
},
{
"id": 31,
"questionText": "In Gradient Boosting, what does a 'residual' represent?",
"options": [
"Tree depth",
"Bootstrap sample size",
"Sum of squared errors",
"Difference between actual and predicted values"
],
"correctAnswerIndex": 3,
"explanation": "Gradient Boosting fits each subsequent learner to the residuals (errors) of the previous model to improve predictions."
},
{
"id": 32,
"questionText": "Which parameter controls the complexity of trees in Gradient Boosting?",
"options": [
"Max depth of trees",
"Learning rate",
"Bootstrap fraction",
"Number of samples"
],
"correctAnswerIndex": 0,
"explanation": "Max depth limits tree complexity, preventing overfitting in Gradient Boosting models."
},
{
"id": 33,
"questionText": "In AdaBoost, increasing the number of weak learners too much may:",
"options": [
"Reduce training time",
"Always improve performance",
"Cause overfitting",
"Reduce bias to zero"
],
"correctAnswerIndex": 2,
"explanation": "Too many learners can overfit to training data, especially if noise exists."
},
{
"id": 34,
"questionText": "Gradient Boosting differs from AdaBoost in that it:",
"options": [
"Uses gradient descent to minimize a loss function",
"Uses parallel training",
"Does not adjust sample weights",
"Only works for classification"
],
"correctAnswerIndex": 0,
"explanation": "Gradient Boosting fits new models to the gradient of the loss function, optimizing model performance iteratively."
},
{
"id": 35,
"questionText": "Which technique helps prevent overfitting in Boosting?",
"options": [
"Increasing tree depth",
"Shrinkage (lower learning rate)",
"Ignoring residuals",
"Using only one tree"
],
"correctAnswerIndex": 1,
"explanation": "Reducing learning rate (shrinkage) controls contribution of each learner, preventing overfitting."
},
{
"id": 36,
"questionText": "Which scenario indicates Boosting might overfit?",
"options": [
"Balanced data with shallow trees",
"Small dataset with low variance models",
"Noisy training data with many iterations",
"Parallel training of learners"
],
"correctAnswerIndex": 2,
"explanation": "Boosting focuses on errors, so noisy data can lead the model to overfit to outliers with too many iterations."
},
{
"id": 37,
"questionText": "What is the purpose of 'early stopping' in Gradient Boosting?",
"options": [
"Reduce tree depth",
"Stop adding trees when validation error stops improving",
"Randomly drop trees",
"Increase learning rate"
],
"correctAnswerIndex": 1,
"explanation": "Early stopping halts model training once validation performance stops improving, avoiding overfitting."
},
{
"id": 38,
"questionText": "Which learning rate is preferable for Gradient Boosting with many trees?",
"options": [
"Low learning rate (0.01–0.1)",
"Learning rate does not matter",
"High learning rate (>0.5)",
"Learning rate = 1 always"
],
"correctAnswerIndex": 0,
"explanation": "A low learning rate ensures stable learning and better generalization when many trees are used."
},
{
"id": 39,
"questionText": "In XGBoost, L1 and L2 regularization are used to:",
"options": [
"Increase tree depth automatically",
"Reduce dataset size",
"Prevent overfitting and improve generalization",
"Increase learning rate"
],
"correctAnswerIndex": 2,
"explanation": "Regularization penalizes complex models, reducing overfitting and improving generalization performance."
},
{
"id": 40,
"questionText": "Which type of base learners are typically used in Boosting?",
"options": [
"Shallow decision trees (stumps)",
"Clustering models",
"Deep neural networks",
"Linear regression only"
],
"correctAnswerIndex": 0,
"explanation": "Boosting usually uses simple base learners like shallow trees to incrementally improve performance."
},
{
"id": 41,
"questionText": "Which metric is commonly used to evaluate Boosting in regression tasks?",
"options": [
"F1-score",
"AUC",
"Mean Squared Error (MSE)",
"Silhouette Score"
],
"correctAnswerIndex": 2,
"explanation": "Regression evaluation typically uses metrics like MSE, RMSE, or MAE."
},
{
"id": 42,
"questionText": "In Gradient Boosting, the number of trees should be:",
"options": [
"Irrelevant",
"Balanced with learning rate for optimal performance",
"Always low",
"As high as possible always"
],
"correctAnswerIndex": 1,
"explanation": "A low learning rate requires more trees; a high learning rate may need fewer trees. Balance is essential."
},
{
"id": 43,
"questionText": "Boosting is particularly effective for:",
"options": [
"High bias models",
"High variance, low bias models",
"Unsupervised learning",
"Dimensionality reduction"
],
"correctAnswerIndex": 0,
"explanation": "Boosting reduces bias by combining weak learners sequentially, improving predictions."
},
{
"id": 44,
"questionText": "Why does Boosting focus on misclassified instances?",
"options": [
"To improve overall model accuracy",
"To reduce training time",
"To ignore noisy data",
"To increase bias"
],
"correctAnswerIndex": 0,
"explanation": "Focusing on difficult samples ensures sequential learners correct mistakes, improving ensemble performance."
},
{
"id": 45,
"questionText": "Which of the following Boosting algorithms is gradient-based?",
"options": [
"Random Forest",
"Bagging",
"Gradient Boosting",
"AdaBoost"
],
"correctAnswerIndex": 2,
"explanation": "Gradient Boosting uses gradients of a loss function to guide sequential learning."
},
{
"id": 46,
"questionText": "Which parameter in Gradient Boosting controls the step size of updates?",
"options": [
"Max depth",
"Number of features",
"Learning rate",
"Subsample fraction"
],
"correctAnswerIndex": 2,
"explanation": "Learning rate scales each learner’s contribution, preventing overfitting and ensuring smooth convergence."
},
{
"id": 47,
"questionText": "Which technique helps reduce variance in Boosting?",
"options": [
"High learning rate",
"Subsampling (stochastic gradient boosting)",
"Increasing tree depth",
"Using all features always"
],
"correctAnswerIndex": 1,
"explanation": "Randomly subsampling data and features adds diversity among trees, reducing variance."
},
{
"id": 48,
"questionText": "Which approach is used in XGBoost to improve computational efficiency?",
"options": [
"Parallel tree construction",
"Reducing dataset size arbitrarily",
"Ignoring residuals",
"Sequential single-thread building"
],
"correctAnswerIndex": 0,
"explanation": "XGBoost optimizes training speed via parallel computation and efficient data structures."
},
{
"id": 49,
"questionText": "In AdaBoost, a weak learner with higher error receives:",
"options": [
"Ignored completely",
"Lower weight in the final model",
"Higher weight",
"Same weight as others"
],
"correctAnswerIndex": 1,
"explanation": "Learners with higher error contribute less to the final prediction; AdaBoost weights are proportional to accuracy."
},
{
"id": 50,
"questionText": "Which method helps prevent Boosting from overfitting on noisy datasets?",
"options": [
"Adding more learners",
"Increasing tree depth",
"Shrinkage (low learning rate) and early stopping",
"Using only one tree"
],
"correctAnswerIndex": 2,
"explanation": "Controlling contribution of each learner and halting training early reduces overfitting on noise."
},
{
"id": 51,
"questionText": "Gradient Boosting can optimize which type of loss functions?",
"options": [
"Only squared error",
"Only absolute error",
"Any differentiable loss function",
"Only cross-entropy"
],
"correctAnswerIndex": 2,
"explanation": "Gradient Boosting is flexible, capable of optimizing any differentiable loss suitable for the task."
},
{
"id": 52,
"questionText": "Which of the following is a practical use of Boosting?",
"options": [
"Dimensionality reduction",
"Image clustering",
"Fraud detection in banking",
"Principal Component Analysis"
],
"correctAnswerIndex": 2,
"explanation": "Boosting excels in classification tasks like fraud detection due to its high accuracy and bias reduction."
},
{
"id": 53,
"questionText": "Which combination prevents overfitting in Gradient Boosting?",
"options": [
"Single learner",
"High learning rate and deep trees",
"Many features only",
"Low learning rate and limited tree depth"
],
"correctAnswerIndex": 3,
"explanation": "Limiting tree complexity and using a lower learning rate ensures better generalization."
},
{
"id": 54,
"questionText": "Why is subsampling used in stochastic Gradient Boosting?",
"options": [
"To increase bias",
"To increase training time",
"To remove features",
"To reduce correlation among trees and variance"
],
"correctAnswerIndex": 3,
"explanation": "Randomly selecting subsets of data adds diversity, reducing variance while maintaining bias reduction."
},
{
"id": 55,
"questionText": "In Boosting, why might small weak learners perform better?",
"options": [
"They ignore residuals",
"They remove features",
"They increase bias drastically",
"They reduce overfitting and allow incremental improvement"
],
"correctAnswerIndex": 3,
"explanation": "Simple learners prevent overfitting and allow sequential models to improve predictions gradually."
},
{
"id": 56,
"questionText": "XGBoost uses which technique for missing values?",
"options": [
"Replace with zeros always",
"Ignore missing values",
"Learn default direction in trees automatically",
"Drop rows with missing data"
],
"correctAnswerIndex": 2,
"explanation": "XGBoost can handle missing values by learning the optimal default direction in the tree splits."
},
{
"id": 57,
"questionText": "Which Boosting variant is particularly fast and scalable?",
"options": [
"Random Forest",
"Bagging",
"XGBoost",
"AdaBoost"
],
"correctAnswerIndex": 2,
"explanation": "XGBoost uses optimized computation, parallelization, and regularization, making it fast and scalable for large datasets."
},
{
"id": 58,
"questionText": "Which technique in Boosting ensures sequential models learn from previous mistakes?",
"options": [
"Feature selection",
"Clustering",
"Parallel averaging",
"Weighted samples or residual fitting"
],
"correctAnswerIndex": 3,
"explanation": "Boosting adjusts weights (classification) or fits residuals (regression) to focus on errors from prior learners."
},
{
"id": 59,
"questionText": "Which factor most affects Boosting performance?",
"options": [
"Bootstrap fraction only",
"Dataset size only",
"Feature normalization only",
"Learning rate, number of trees, and base learner complexity"
],
"correctAnswerIndex": 3,
"explanation": "Performance depends on carefully balancing learning rate, number of learners, and the complexity of base learners."
},
{
"id": 60,
"questionText": "Why is Boosting sensitive to outliers?",
"options": [
"Because data is sampled randomly",
"Because trees ignore residuals",
"Because learning rate is always high",
"Because subsequent learners focus on misclassified points"
],
"correctAnswerIndex": 3,
"explanation": "Boosting emphasizes misclassified points, which can amplify the effect of outliers if not handled properly."
},
{
"id": 61,
"questionText": "Which of these parameters is tuned to avoid overfitting in XGBoost?",
"options": [
"Only learning rate",
"Only bootstrap fraction",
"Max depth, learning rate, number of estimators, and regularization",
"Only max features"
],
"correctAnswerIndex": 2,
"explanation": "Tuning these parameters ensures balanced bias-variance trade-off and prevents overfitting."
},
{
"id": 62,
"questionText": "Which advantage does Gradient Boosting have over AdaBoost?",
"options": [
"Works only with binary classification",
"Flexible loss function optimization",
"Uses decision stumps only",
"Parallel computation only"
],
"correctAnswerIndex": 1,
"explanation": "Gradient Boosting can optimize differentiable loss functions, allowing applications in regression and classification tasks."
},
{
"id": 63,
"questionText": "How does subsample fraction affect stochastic Gradient Boosting?",
"options": [
"Reduces correlation among trees and variance",
"Reduces learning rate automatically",
"Removes trees",
"Increases bias only"
],
"correctAnswerIndex": 0,
"explanation": "Randomly using a subset of data for each tree increases diversity and prevents overfitting."
},
{
"id": 64,
"questionText": "Why might small learning rate with many trees outperform high learning rate?",
"options": [
"More stable learning and reduced overfitting",
"Removes noise automatically",
"Reduces bias drastically",
"Faster training"
],
"correctAnswerIndex": 0,
"explanation": "Lower learning rate ensures gradual learning, allowing better generalization and avoiding overfitting."
},
{
"id": 65,
"questionText": "Which Boosting variant is commonly used for large-scale datasets?",
"options": [
"XGBoost",
"Gradient Descent only",
"AdaBoost",
"Bagging"
],
"correctAnswerIndex": 0,
"explanation": "XGBoost is optimized for speed and scalability, suitable for large datasets."
},
{
"id": 66,
"questionText": "Which scenario may cause Gradient Boosting to underperform?",
"options": [
"Shallow learners only",
"Balanced data with low variance models",
"High noise with extreme outliers",
"Early stopping used"
],
"correctAnswerIndex": 2,
"explanation": "Boosting focuses on misclassified points, so noisy datasets with outliers can mislead sequential learners."
},
{
"id": 67,
"questionText": "What is the effect of high tree depth in Gradient Boosting?",
"options": [
"Removes need for learning rate",
"Reduces bias drastically",
"May increase overfitting",
"Always improves accuracy"
],
"correctAnswerIndex": 2,
"explanation": "Deeper trees capture more details but may overfit the training data, especially in Boosting."
},
{
"id": 68,
"questionText": "What is the main purpose of regularization in XGBoost?",
"options": [
"Add more trees",
"Remove residuals",
"Increase learning rate automatically",
"Reduce overfitting and improve generalization"
],
"correctAnswerIndex": 3,
"explanation": "Regularization penalizes complex models to prevent overfitting and enhance generalization."
},
{
"id": 69,
"questionText": "Which parameter combination is key to tuning Boosting?",
"options": [
"Bootstrap fraction only",
"Random seed only",
"Number of features only",
"Number of trees, learning rate, tree depth"
],
"correctAnswerIndex": 3,
"explanation": "Balancing the number of trees, learning rate, and tree depth is crucial for optimal performance."
},
{
"id": 70,
"questionText": "Which approach increases Boosting model diversity and reduces correlation?",
"options": [
"Ignoring residuals",
"Using single tree",
"Stochastic subsampling of data or features",
"Increasing tree depth"
],
"correctAnswerIndex": 2,
"explanation": "Randomly subsampling rows or features creates diverse learners, improving ensemble robustness."
},
{
"id": 71,
"questionText": "A credit card company wants to detect fraud using Boosting. What should they be careful about?",
"options": [
"Outliers and class imbalance",
"Number of features only",
"Use unsupervised learning",
"Shallow learners only"
],
"correctAnswerIndex": 0,
"explanation": "Fraud datasets are highly imbalanced and contain outliers; Boosting can overfit if these are not handled properly."
},
{
"id": 72,
"questionText": "In a noisy regression dataset, using many deep trees in Gradient Boosting may:",
"options": [
"Reduce bias to zero automatically",
"Always improve predictions",
"Ignore residuals",
"Overfit to noise and reduce generalization"
],
"correctAnswerIndex": 3,
"explanation": "Deep trees capture noise as well as signal, which can lead to overfitting in Boosting."
},
{
"id": 73,
"questionText": "A machine learning engineer wants faster training on a large dataset with Gradient Boosting. What is a good approach?",
"options": [
"Increase tree depth drastically",
"Use very high learning rate",
"Use single tree only",
"Use subsample fraction <1 and parallel processing (XGBoost)"
],
"correctAnswerIndex": 3,
"explanation": "Stochastic subsampling and parallel computation improve speed while maintaining performance."
},
{
"id": 74,
"questionText": "Which scenario might make AdaBoost underperform?",
"options": [
"Balanced and clean data",
"High noise in labels",
"Low variance weak learners",
"Small number of iterations"
],
"correctAnswerIndex": 1,
"explanation": "AdaBoost focuses on misclassified samples, so noisy labels can mislead the learning process."
},
{
"id": 75,
"questionText": "In Gradient Boosting, early stopping is used to:",
"options": [
"Always increase number of trees",
"Increase learning rate automatically",
"Reduce number of features",
"Prevent overfitting by halting when validation error stops improving"
],
"correctAnswerIndex": 3,
"explanation": "Early stopping monitors validation error and halts training when additional trees no longer improve performance."
},
{
"id": 76,
"questionText": "In XGBoost, why is column subsampling useful?",
"options": [
"Increases tree depth automatically",
"Removes residuals",
"Reduces correlation among trees and improves generalization",
"Only affects training speed"
],
"correctAnswerIndex": 2,
"explanation": "Randomly selecting a subset of features for each tree reduces correlation and overfitting."
},
{
"id": 77,
"questionText": "A dataset contains extreme outliers. Which Boosting strategy helps?",
"options": [
"Use robust loss function or limit tree depth",
"Increase learning rate",
"Use many deep trees",
"Ignore the outliers"
],
"correctAnswerIndex": 0,
"explanation": "Robust loss functions and shallow trees prevent Boosting from fitting outliers excessively."
},
{
"id": 78,
"questionText": "Gradient Boosting is being used for house price prediction. Which combination prevents overfitting?",
"options": [
"Shallow trees only with one iteration",
"High learning rate and deep trees",
"Ignore residuals",
"Low learning rate and moderate tree depth"
],
"correctAnswerIndex": 3,
"explanation": "Lower learning rate with appropriately sized trees ensures gradual learning and better generalization."
},
{
"id": 79,
"questionText": "Which is a key advantage of XGBoost over standard Gradient Boosting?",
"options": [
"Works only with small datasets",
"Regularization and efficient computation",
"No need for tuning",
"Always reduces bias to zero"
],
"correctAnswerIndex": 1,
"explanation": "XGBoost adds L1/L2 regularization and computational optimizations, making it faster and less prone to overfitting."
},
{
"id": 80,
"questionText": "Which real-world scenario suits Boosting best?",
"options": [
"Linear regression with few samples",
"Binary classification with imbalanced dataset",
"Dimensionality reduction",
"Unsupervised clustering"
],
"correctAnswerIndex": 1,
"explanation": "Boosting is highly effective for classification problems, especially when the dataset is imbalanced or has complex patterns."
},
{
"id": 81,
"questionText": "Why might using very large trees in Boosting be harmful?",
"options": [
"Reduces computation time",
"Always improves bias",
"Removes residuals automatically",
"Can overfit to noise and increase variance"
],
"correctAnswerIndex": 3,
"explanation": "Complex trees capture noise, causing overfitting and reducing generalization."
},
{
"id": 82,
"questionText": "Which scenario requires tuning the learning rate and number of trees carefully?",
"options": [
"Random sampling",
"Large Gradient Boosting models for structured data",
"Unsupervised PCA",
"Single decision stump for small data"
],
"correctAnswerIndex": 1,
"explanation": "Learning rate and tree number must be balanced for stable learning and prevention of overfitting."
},
{
"id": 83,
"questionText": "A Gradient Boosting model shows high training accuracy but low validation accuracy. What could help?",
"options": [
"Increase learning rate",
"Use fewer trees only",
"Increase tree depth",
"Reduce tree depth and use early stopping"
],
"correctAnswerIndex": 3,
"explanation": "Controlling tree complexity and stopping training early mitigates overfitting."
},
{
"id": 84,
"questionText": "Which feature of Boosting allows it to handle difficult classification tasks?",
"options": [
"Ignoring residuals",
"Parallel averaging of trees",
"Sequential focus on misclassified instances",
"Random feature selection only"
],
"correctAnswerIndex": 2,
"explanation": "Boosting emphasizes errors from previous learners, improving accuracy on difficult cases."
},
{
"id": 85,
"questionText": "How can noisy labels in classification affect Boosting?",
"options": [
"Noise improves accuracy",
"Residuals are unaffected",
"Boosting ignores noisy labels automatically",
"Learners may focus on noise, causing overfitting"
],
"correctAnswerIndex": 3,
"explanation": "Boosting gives higher weights to misclassified points, which can include noisy labels, leading to overfitting."
},
{
"id": 86,
"questionText": "A Gradient Boosting model takes very long to train. Which strategy improves efficiency?",
"options": [
"Increase tree depth",
"Reduce learning rate and use subsampling (stochastic GB)",
"Add more features",
"Use only one tree"
],
"correctAnswerIndex": 1,
"explanation": "Subsampling data or features and using stochastic gradient boosting improves computational efficiency."
},
{
"id": 87,
"questionText": "In XGBoost, regularization parameters control:",
"options": [
"Learning rate only",
"Model complexity and overfitting",
"Subsampling only",
"Tree depth only"
],
"correctAnswerIndex": 1,
"explanation": "L1/L2 regularization penalizes complex models, reducing overfitting."
},
{
"id": 88,
"questionText": "Which technique improves generalization in Boosting?",
"options": [
"Increasing learning rate",
"Adding very deep trees only",
"Ignoring residuals",
"Stochastic sampling of data and features"
],
"correctAnswerIndex": 3,
"explanation": "Randomly sampling rows and features introduces diversity and reduces correlation among learners."
},
{
"id": 89,
"questionText": "Which problem type is Boosting less suited for?",
"options": [
"Extremely noisy datasets",
"Structured regression",
"Fraud detection",
"Binary classification"
],
"correctAnswerIndex": 0,
"explanation": "Boosting may overfit on extremely noisy data because it focuses on correcting previous errors."
},
{
"id": 90,
"questionText": "Why is learning rate critical in Boosting?",
"options": [
"Reduces dataset size",
"Controls incremental contribution of each learner",
"Increases tree depth automatically",
"Selects features randomly"
],
"correctAnswerIndex": 1,
"explanation": "Learning rate scales each learner’s impact, affecting convergence and overfitting."
},
{
"id": 91,
"questionText": "Which parameter combination often requires tuning in real-world Boosting tasks?",
"options": [
"Bootstrap fraction only",
"Number of features only",
"Learning rate, number of trees, tree depth, subsample fraction",
"Random seed only"
],
"correctAnswerIndex": 2,
"explanation": "Balancing these parameters is crucial for model performance and generalization."
},
{
"id": 92,
"questionText": "Boosting can achieve better results than Bagging when:",
"options": [
"Variance is low",
"Data is perfectly clean",
"Only linear models are used",
"Bias is high and sequential error correction is needed"
],
"correctAnswerIndex": 3,
"explanation": "Boosting reduces bias by focusing on errors sequentially, while Bagging primarily reduces variance."
},
{
"id": 93,
"questionText": "A Gradient Boosting model predicts housing prices poorly on unseen data. Likely reason?",
"options": [
"Learning rate too low only",
"Bias too low",
"Training data too small only",
"Overfitting due to deep trees or high learning rate"
],
"correctAnswerIndex": 3,
"explanation": "Overfitting arises when trees are too deep or learning rate too high, harming generalization."
},
{
"id": 94,
"questionText": "Scenario: Boosting model for fraud detection shows high accuracy but low recall. What to improve?",
"options": [
"Use very high learning rate",
"Increase tree depth only",
"Adjust class weights or sampling to focus on minority class",
"Reduce number of trees"
],
"correctAnswerIndex": 2,
"explanation": "Class imbalance can cause Boosting to favor majority class; weighting or sampling helps improve recall."
},
{
"id": 95,
"questionText": "Which real-world application suits XGBoost the most?",
"options": [
"Predicting customer churn",
"Image generation",
"Clustering retail products",
"PCA for dimensionality reduction"
],
"correctAnswerIndex": 0,
"explanation": "XGBoost excels at structured data problems like churn prediction due to high accuracy and handling of complex patterns."
},
{
"id": 96,
"questionText": "Scenario: Boosting is overfitting on noisy data. Recommended fix?",
"options": [
"Increase number of trees only",
"Increase tree depth",
"Reduce learning rate, shallow trees, early stopping",
"Ignore residuals"
],
"correctAnswerIndex": 2,
"explanation": "Controlling model complexity and learning rate helps reduce overfitting on noisy data."
},
{
"id": 97,
"questionText": "Which is a main strength of Boosting over Bagging?",
"options": [
"Always faster",
"Reduces bias via sequential error correction",
"No need to tune parameters",
"Reduces variance only"
],
"correctAnswerIndex": 1,
"explanation": "Boosting sequentially reduces bias by focusing on previous errors, while Bagging mainly reduces variance."
},
{
"id": 98,
"questionText": "Scenario: Using Boosting for medical diagnosis with class imbalance. Best strategy?",
"options": [
"Use class weighting or SMOTE with Boosting",
"Use default parameters only",
"Ignore minority class",
"Reduce number of trees"
],
"correctAnswerIndex": 0,
"explanation": "Balancing classes ensures minority class predictions are accurate in Boosting models."
},
{
"id": 99,
"questionText": "Why does XGBoost often outperform traditional Gradient Boosting?",
"options": [
"Only deeper trees",
"Only more trees",
"Regularization, parallelization, and optimized tree learning",
"Only higher learning rate"
],
"correctAnswerIndex": 2,
"explanation": "XGBoost includes computational optimizations and regularization techniques, improving performance and generalization."
},
{
"id": 100,
"questionText": "Scenario: Boosting for credit risk classification performs poorly. Which strategy helps?",
"options": [
"High learning rate only",
"Increase number of trees only",
"Feature engineering, handling class imbalance, tuning learning rate and tree depth",
"Ignore residuals"
],
"correctAnswerIndex": 2,
"explanation": "Careful feature engineering, class balancing, and parameter tuning are critical for high-performing Boosting models."
}
]
}
|