Spaces:

deedrop1140
/

MachineLearningAlgorithms

Running

App Files Files Community

MachineLearningAlgorithms / data /Boosting.json

deedrop1140

Upload 41 files

0d00d62 verified 3 months ago

raw

history blame contribute delete

46.8 kB

	{
	"title": "Boosting Mastery: 100 MCQs",
	"description": "A comprehensive set of 100 multiple-choice questions designed to test and deepen your understanding of Boosting, from basic concepts to advanced applications including AdaBoost, Gradient Boosting, XGBoost, and real-world scenarios.",
	"questions": [
	{
	"id": 1,
	"questionText": "What is the main goal of Boosting?",
	"options": [
	"Reduce dataset size",
	"Reduce bias and improve predictive accuracy",
	"Reduce variance only",
	"Cluster similar instances"
	],
	"correctAnswerIndex": 1,
	"explanation": "Boosting is an ensemble technique that sequentially combines weak learners to reduce bias and improve predictive accuracy by focusing on errors made by previous models."
	},
	{
	"id": 2,
	"questionText": "Which characteristic defines a weak learner in Boosting?",
	"options": [
	"Unsupervised algorithm",
	"Perfect prediction capability",
	"Slightly better than random guessing",
	"High-variance model"
	],
	"correctAnswerIndex": 2,
	"explanation": "A weak learner performs slightly better than random chance. Boosting combines many such weak learners to create a strong model."
	},
	{
	"id": 3,
	"questionText": "How does Boosting handle misclassified samples?",
	"options": [
	"It reduces their weights",
	"It removes them from the dataset",
	"It ignores misclassified samples",
	"It increases their weights for the next learner"
	],
	"correctAnswerIndex": 3,
	"explanation": "Boosting assigns higher weights to misclassified samples so that subsequent models focus on them, improving overall accuracy."
	},
	{
	"id": 4,
	"questionText": "Which of the following is a common Boosting algorithm?",
	"options": [
	"PCA",
	"Random Forest",
	"K-Means",
	"AdaBoost"
	],
	"correctAnswerIndex": 3,
	"explanation": "AdaBoost is one of the earliest and most common Boosting algorithms, combining weak learners sequentially."
	},
	{
	"id": 5,
	"questionText": "Boosting is generally considered:",
	"options": [
	"Unsupervised technique",
	"Sequential ensemble method",
	"Parallel ensemble method",
	"Clustering algorithm"
	],
	"correctAnswerIndex": 1,
	"explanation": "Boosting trains models sequentially, each focusing on the errors of the previous model, unlike Bagging which is parallel."
	},
	{
	"id": 6,
	"questionText": "In AdaBoost, what does the weight assigned to each weak learner represent?",
	"options": [
	"Its contribution to the final model",
	"Its bias only",
	"Its variance",
	"Its training time"
	],
	"correctAnswerIndex": 0,
	"explanation": "Each weak learner is assigned a weight based on its accuracy. More accurate learners have higher influence in the final ensemble."
	},
	{
	"id": 7,
	"questionText": "Which error type does Boosting primarily aim to reduce?",
	"options": [
	"Bias",
	"Dataset error",
	"Irreducible error",
	"Variance"
	],
	"correctAnswerIndex": 0,
	"explanation": "Boosting sequentially trains weak learners to correct previous errors, reducing bias and improving model accuracy."
	},
	{
	"id": 8,
	"questionText": "What is the key difference between Bagging and Boosting?",
	"options": [
	"Boosting always uses deep learners",
	"Bagging reduces bias; Boosting reduces variance",
	"Bagging trains models independently; Boosting sequentially",
	"Bagging requires weighted samples; Boosting does not"
	],
	"correctAnswerIndex": 2,
	"explanation": "Bagging reduces variance by averaging independent models. Boosting reduces bias by sequentially training learners focusing on previous errors."
	},
	{
	"id": 9,
	"questionText": "Gradient Boosting differs from AdaBoost because it:",
	"options": [
	"Is unsupervised",
	"Uses parallel trees",
	"Optimizes a loss function using gradient descent",
	"Ignores misclassified samples"
	],
	"correctAnswerIndex": 2,
	"explanation": "Gradient Boosting sequentially fits models to the residuals of the previous model using gradient descent to optimize a chosen loss function."
	},
	{
	"id": 10,
	"questionText": "Which metric can be used to evaluate Boosting performance for classification?",
	"options": [
	"Accuracy, F1-score, AUC",
	"Silhouette Score",
	"Mean Squared Error only",
	"R-squared"
	],
	"correctAnswerIndex": 0,
	"explanation": "Classification metrics such as Accuracy, F1-score, and AUC are suitable for evaluating Boosting performance on classification tasks."
	},
	{
	"id": 11,
	"questionText": "Boosting works best with:",
	"options": [
	"High bias weak learners",
	"Clustering algorithms",
	"Unsupervised learners",
	"Low bias, low variance models"
	],
	"correctAnswerIndex": 0,
	"explanation": "Boosting combines weak learners that are biased but not too complex, sequentially correcting errors to form a strong model."
	},
	{
	"id": 12,
	"questionText": "What is the role of learning rate in Boosting?",
	"options": [
	"Reduces number of features",
	"Controls contribution of each weak learner",
	"Controls bootstrap sample size",
	"Determines tree depth"
	],
	"correctAnswerIndex": 1,
	"explanation": "Learning rate scales the contribution of each weak learner, allowing fine-tuning of the ensemble to prevent overfitting."
	},
	{
	"id": 13,
	"questionText": "Which of these is true about overfitting in Boosting?",
	"options": [
	"Boosting is only for regression",
	"Boosting always underfits",
	"Boosting can overfit if too many weak learners are used",
	"Boosting never overfits"
	],
	"correctAnswerIndex": 2,
	"explanation": "Using too many learners or too complex learners can lead Boosting to overfit, especially with small datasets."
	},
	{
	"id": 14,
	"questionText": "XGBoost differs from standard Gradient Boosting in that it:",
	"options": [
	"Ignores gradients",
	"Is unsupervised",
	"Does not use trees",
	"Uses regularization and optimized computation"
	],
	"correctAnswerIndex": 3,
	"explanation": "XGBoost adds regularization (L1 and L2) and efficient computational techniques, improving performance and reducing overfitting."
	},
	{
	"id": 15,
	"questionText": "What is the main advantage of Boosting over a single model?",
	"options": [
	"Higher accuracy and lower bias",
	"Faster training",
	"Simplified model interpretation",
	"Reduced number of features"
	],
	"correctAnswerIndex": 0,
	"explanation": "By sequentially correcting errors, Boosting often achieves higher accuracy and reduces bias compared to a single model."
	},
	{
	"id": 16,
	"questionText": "How does Boosting handle misclassified instances in regression?",
	"options": [
	"Ignores residuals",
	"Uses majority voting",
	"Focuses on residuals for next learner",
	"Removes outliers completely"
	],
	"correctAnswerIndex": 2,
	"explanation": "In regression, Boosting fits the next weak learner to the residuals (errors) of the previous learner, reducing bias."
	},
	{
	"id": 17,
	"questionText": "Which of the following is NOT a Boosting algorithm?",
	"options": [
	"Gradient Boosting",
	"Random Forest",
	"AdaBoost",
	"XGBoost"
	],
	"correctAnswerIndex": 1,
	"explanation": "Random Forest is a Bagging-based ensemble method, not Boosting."
	},
	{
	"id": 18,
	"questionText": "Boosting is most useful when:",
	"options": [
	"Weak learners have high bias",
	"Clustering is needed",
	"Base learners have low variance",
	"Dataset is extremely large"
	],
	"correctAnswerIndex": 0,
	"explanation": "Boosting reduces bias by combining weak learners that perform slightly better than chance."
	},
	{
	"id": 19,
	"questionText": "Which is true about sequential learning in Boosting?",
	"options": [
	"Bootstrap samples are ignored",
	"Each model depends on previous model’s errors",
	"Models are trained independently",
	"Training is unsupervised"
	],
	"correctAnswerIndex": 1,
	"explanation": "Boosting trains models sequentially, with each learner focusing on the errors of previous learners to reduce bias."
	},
	{
	"id": 20,
	"questionText": "Gradient Boosting can be used with which loss functions?",
	"options": [
	"Only log-loss",
	"Only hinge loss",
	"Only squared error",
	"Any differentiable loss function"
	],
	"correctAnswerIndex": 3,
	"explanation": "Gradient Boosting is flexible and can optimize any differentiable loss function appropriate for the problem."
	},
	{
	"id": 21,
	"questionText": "Boosting can handle overfitting better with:",
	"options": [
	"Higher learning rate",
	"Lower learning rate and early stopping",
	"Ignoring residuals",
	"More features only"
	],
	"correctAnswerIndex": 1,
	"explanation": "A lower learning rate reduces the contribution of each learner, and early stopping prevents adding too many learners, mitigating overfitting."
	},
	{
	"id": 22,
	"questionText": "Which property makes Boosting different from Bagging?",
	"options": [
	"Bootstrap sampling only",
	"Random feature selection",
	"Sequential error correction",
	"Parallel variance reduction"
	],
	"correctAnswerIndex": 2,
	"explanation": "Boosting sequentially corrects errors, whereas Bagging builds independent models in parallel for variance reduction."
	},
	{
	"id": 23,
	"questionText": "AdaBoost works primarily with which type of learners?",
	"options": [
	"Decision stumps",
	"Linear regression",
	"Deep neural networks",
	"Clustering models"
	],
	"correctAnswerIndex": 0,
	"explanation": "AdaBoost often uses simple learners like decision stumps, combining many to form a strong model."
	},
	{
	"id": 24,
	"questionText": "Which is a limitation of Boosting?",
	"options": [
	"Cannot reduce bias",
	"Works only for regression",
	"Sensitive to noisy data and outliers",
	"Does not improve accuracy"
	],
	"correctAnswerIndex": 2,
	"explanation": "Boosting can overfit if data contains noise or outliers because later learners focus on these problematic points."
	},
	{
	"id": 25,
	"questionText": "Boosting is considered a strong learner because it:",
	"options": [
	"Is a single tree",
	"Combines multiple weak learners to reduce bias",
	"Reduces dataset size",
	"Ignores misclassified instances"
	],
	"correctAnswerIndex": 1,
	"explanation": "By sequentially combining weak learners that correct each other’s errors, Boosting produces a high-accuracy strong model."
	},
	{
	"id": 26,
	"questionText": "XGBoost improves Gradient Boosting by:",
	"options": [
	"Adding regularization and efficient computation",
	"Reducing dataset size",
	"Using unsupervised trees",
	"Ignoring residuals"
	],
	"correctAnswerIndex": 0,
	"explanation": "XGBoost introduces L1/L2 regularization and optimized tree construction, improving generalization and speed."
	},
	{
	"id": 27,
	"questionText": "What happens if Boosting is applied with very complex base learners?",
	"options": [
	"Variance is ignored",
	"Overfitting is likely",
	"Bias reduces automatically",
	"Model becomes linear"
	],
	"correctAnswerIndex": 1,
	"explanation": "Using highly complex base learners in Boosting can lead to overfitting, especially if the number of learners is large."
	},
	{
	"id": 28,
	"questionText": "Why is learning rate important in Gradient Boosting?",
	"options": [
	"It selects features randomly",
	"It increases dataset size",
	"It prevents bootstrapping",
	"It controls the step size in gradient descent"
	],
	"correctAnswerIndex": 3,
	"explanation": "Learning rate scales the contribution of each tree in Gradient Boosting, affecting convergence and overfitting."
	},
	{
	"id": 29,
	"questionText": "Which of the following best describes Boosting?",
	"options": [
	"Clustering algorithm",
	"Sequential ensemble focusing on reducing bias",
	"Dimensionality reduction technique",
	"Parallel ensemble focusing on reducing variance"
	],
	"correctAnswerIndex": 1,
	"explanation": "Boosting sequentially trains models to correct errors, reducing bias and improving performance."
	},
	{
	"id": 30,
	"questionText": "Which approach can prevent overfitting in Boosting?",
	"options": [
	"Removing features randomly",
	"Early stopping and shrinkage (low learning rate)",
	"Increasing tree depth only",
	"Ignoring residuals"
	],
	"correctAnswerIndex": 1,
	"explanation": "Early stopping and low learning rate (shrinkage) prevent overfitting by controlling model complexity and contribution of each learner."
	},
	{
	"id": 31,
	"questionText": "In Gradient Boosting, what does a 'residual' represent?",
	"options": [
	"Tree depth",
	"Bootstrap sample size",
	"Sum of squared errors",
	"Difference between actual and predicted values"
	],
	"correctAnswerIndex": 3,
	"explanation": "Gradient Boosting fits each subsequent learner to the residuals (errors) of the previous model to improve predictions."
	},
	{
	"id": 32,
	"questionText": "Which parameter controls the complexity of trees in Gradient Boosting?",
	"options": [
	"Max depth of trees",
	"Learning rate",
	"Bootstrap fraction",
	"Number of samples"
	],
	"correctAnswerIndex": 0,
	"explanation": "Max depth limits tree complexity, preventing overfitting in Gradient Boosting models."
	},
	{
	"id": 33,
	"questionText": "In AdaBoost, increasing the number of weak learners too much may:",
	"options": [
	"Reduce training time",
	"Always improve performance",
	"Cause overfitting",
	"Reduce bias to zero"
	],
	"correctAnswerIndex": 2,
	"explanation": "Too many learners can overfit to training data, especially if noise exists."
	},
	{
	"id": 34,
	"questionText": "Gradient Boosting differs from AdaBoost in that it:",
	"options": [
	"Uses gradient descent to minimize a loss function",
	"Uses parallel training",
	"Does not adjust sample weights",
	"Only works for classification"
	],
	"correctAnswerIndex": 0,
	"explanation": "Gradient Boosting fits new models to the gradient of the loss function, optimizing model performance iteratively."
	},
	{
	"id": 35,
	"questionText": "Which technique helps prevent overfitting in Boosting?",
	"options": [
	"Increasing tree depth",
	"Shrinkage (lower learning rate)",
	"Ignoring residuals",
	"Using only one tree"
	],
	"correctAnswerIndex": 1,
	"explanation": "Reducing learning rate (shrinkage) controls contribution of each learner, preventing overfitting."
	},
	{
	"id": 36,
	"questionText": "Which scenario indicates Boosting might overfit?",
	"options": [
	"Balanced data with shallow trees",
	"Small dataset with low variance models",
	"Noisy training data with many iterations",
	"Parallel training of learners"
	],
	"correctAnswerIndex": 2,
	"explanation": "Boosting focuses on errors, so noisy data can lead the model to overfit to outliers with too many iterations."
	},
	{
	"id": 37,
	"questionText": "What is the purpose of 'early stopping' in Gradient Boosting?",
	"options": [
	"Reduce tree depth",
	"Stop adding trees when validation error stops improving",
	"Randomly drop trees",
	"Increase learning rate"
	],
	"correctAnswerIndex": 1,
	"explanation": "Early stopping halts model training once validation performance stops improving, avoiding overfitting."
	},
	{
	"id": 38,
	"questionText": "Which learning rate is preferable for Gradient Boosting with many trees?",
	"options": [
	"Low learning rate (0.01–0.1)",
	"Learning rate does not matter",
	"High learning rate (>0.5)",
	"Learning rate = 1 always"
	],
	"correctAnswerIndex": 0,
	"explanation": "A low learning rate ensures stable learning and better generalization when many trees are used."
	},
	{
	"id": 39,
	"questionText": "In XGBoost, L1 and L2 regularization are used to:",
	"options": [
	"Increase tree depth automatically",
	"Reduce dataset size",
	"Prevent overfitting and improve generalization",
	"Increase learning rate"
	],
	"correctAnswerIndex": 2,
	"explanation": "Regularization penalizes complex models, reducing overfitting and improving generalization performance."
	},
	{
	"id": 40,
	"questionText": "Which type of base learners are typically used in Boosting?",
	"options": [
	"Shallow decision trees (stumps)",
	"Clustering models",
	"Deep neural networks",
	"Linear regression only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Boosting usually uses simple base learners like shallow trees to incrementally improve performance."
	},
	{
	"id": 41,
	"questionText": "Which metric is commonly used to evaluate Boosting in regression tasks?",
	"options": [
	"F1-score",
	"AUC",
	"Mean Squared Error (MSE)",
	"Silhouette Score"
	],
	"correctAnswerIndex": 2,
	"explanation": "Regression evaluation typically uses metrics like MSE, RMSE, or MAE."
	},
	{
	"id": 42,
	"questionText": "In Gradient Boosting, the number of trees should be:",
	"options": [
	"Irrelevant",
	"Balanced with learning rate for optimal performance",
	"Always low",
	"As high as possible always"
	],
	"correctAnswerIndex": 1,
	"explanation": "A low learning rate requires more trees; a high learning rate may need fewer trees. Balance is essential."
	},
	{
	"id": 43,
	"questionText": "Boosting is particularly effective for:",
	"options": [
	"High bias models",
	"High variance, low bias models",
	"Unsupervised learning",
	"Dimensionality reduction"
	],
	"correctAnswerIndex": 0,
	"explanation": "Boosting reduces bias by combining weak learners sequentially, improving predictions."
	},
	{
	"id": 44,
	"questionText": "Why does Boosting focus on misclassified instances?",
	"options": [
	"To improve overall model accuracy",
	"To reduce training time",
	"To ignore noisy data",
	"To increase bias"
	],
	"correctAnswerIndex": 0,
	"explanation": "Focusing on difficult samples ensures sequential learners correct mistakes, improving ensemble performance."
	},
	{
	"id": 45,
	"questionText": "Which of the following Boosting algorithms is gradient-based?",
	"options": [
	"Random Forest",
	"Bagging",
	"Gradient Boosting",
	"AdaBoost"
	],
	"correctAnswerIndex": 2,
	"explanation": "Gradient Boosting uses gradients of a loss function to guide sequential learning."
	},
	{
	"id": 46,
	"questionText": "Which parameter in Gradient Boosting controls the step size of updates?",
	"options": [
	"Max depth",
	"Number of features",
	"Learning rate",
	"Subsample fraction"
	],
	"correctAnswerIndex": 2,
	"explanation": "Learning rate scales each learner’s contribution, preventing overfitting and ensuring smooth convergence."
	},
	{
	"id": 47,
	"questionText": "Which technique helps reduce variance in Boosting?",
	"options": [
	"High learning rate",
	"Subsampling (stochastic gradient boosting)",
	"Increasing tree depth",
	"Using all features always"
	],
	"correctAnswerIndex": 1,
	"explanation": "Randomly subsampling data and features adds diversity among trees, reducing variance."
	},
	{
	"id": 48,
	"questionText": "Which approach is used in XGBoost to improve computational efficiency?",
	"options": [
	"Parallel tree construction",
	"Reducing dataset size arbitrarily",
	"Ignoring residuals",
	"Sequential single-thread building"
	],
	"correctAnswerIndex": 0,
	"explanation": "XGBoost optimizes training speed via parallel computation and efficient data structures."
	},
	{
	"id": 49,
	"questionText": "In AdaBoost, a weak learner with higher error receives:",
	"options": [
	"Ignored completely",
	"Lower weight in the final model",
	"Higher weight",
	"Same weight as others"
	],
	"correctAnswerIndex": 1,
	"explanation": "Learners with higher error contribute less to the final prediction; AdaBoost weights are proportional to accuracy."
	},
	{
	"id": 50,
	"questionText": "Which method helps prevent Boosting from overfitting on noisy datasets?",
	"options": [
	"Adding more learners",
	"Increasing tree depth",
	"Shrinkage (low learning rate) and early stopping",
	"Using only one tree"
	],
	"correctAnswerIndex": 2,
	"explanation": "Controlling contribution of each learner and halting training early reduces overfitting on noise."
	},
	{
	"id": 51,
	"questionText": "Gradient Boosting can optimize which type of loss functions?",
	"options": [
	"Only squared error",
	"Only absolute error",
	"Any differentiable loss function",
	"Only cross-entropy"
	],
	"correctAnswerIndex": 2,
	"explanation": "Gradient Boosting is flexible, capable of optimizing any differentiable loss suitable for the task."
	},
	{
	"id": 52,
	"questionText": "Which of the following is a practical use of Boosting?",
	"options": [
	"Dimensionality reduction",
	"Image clustering",
	"Fraud detection in banking",
	"Principal Component Analysis"
	],
	"correctAnswerIndex": 2,
	"explanation": "Boosting excels in classification tasks like fraud detection due to its high accuracy and bias reduction."
	},
	{
	"id": 53,
	"questionText": "Which combination prevents overfitting in Gradient Boosting?",
	"options": [
	"Single learner",
	"High learning rate and deep trees",
	"Many features only",
	"Low learning rate and limited tree depth"
	],
	"correctAnswerIndex": 3,
	"explanation": "Limiting tree complexity and using a lower learning rate ensures better generalization."
	},
	{
	"id": 54,
	"questionText": "Why is subsampling used in stochastic Gradient Boosting?",
	"options": [
	"To increase bias",
	"To increase training time",
	"To remove features",
	"To reduce correlation among trees and variance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Randomly selecting subsets of data adds diversity, reducing variance while maintaining bias reduction."
	},
	{
	"id": 55,
	"questionText": "In Boosting, why might small weak learners perform better?",
	"options": [
	"They ignore residuals",
	"They remove features",
	"They increase bias drastically",
	"They reduce overfitting and allow incremental improvement"
	],
	"correctAnswerIndex": 3,
	"explanation": "Simple learners prevent overfitting and allow sequential models to improve predictions gradually."
	},
	{
	"id": 56,
	"questionText": "XGBoost uses which technique for missing values?",
	"options": [
	"Replace with zeros always",
	"Ignore missing values",
	"Learn default direction in trees automatically",
	"Drop rows with missing data"
	],
	"correctAnswerIndex": 2,
	"explanation": "XGBoost can handle missing values by learning the optimal default direction in the tree splits."
	},
	{
	"id": 57,
	"questionText": "Which Boosting variant is particularly fast and scalable?",
	"options": [
	"Random Forest",
	"Bagging",
	"XGBoost",
	"AdaBoost"
	],
	"correctAnswerIndex": 2,
	"explanation": "XGBoost uses optimized computation, parallelization, and regularization, making it fast and scalable for large datasets."
	},
	{
	"id": 58,
	"questionText": "Which technique in Boosting ensures sequential models learn from previous mistakes?",
	"options": [
	"Feature selection",
	"Clustering",
	"Parallel averaging",
	"Weighted samples or residual fitting"
	],
	"correctAnswerIndex": 3,
	"explanation": "Boosting adjusts weights (classification) or fits residuals (regression) to focus on errors from prior learners."
	},
	{
	"id": 59,
	"questionText": "Which factor most affects Boosting performance?",
	"options": [
	"Bootstrap fraction only",
	"Dataset size only",
	"Feature normalization only",
	"Learning rate, number of trees, and base learner complexity"
	],
	"correctAnswerIndex": 3,
	"explanation": "Performance depends on carefully balancing learning rate, number of learners, and the complexity of base learners."
	},
	{
	"id": 60,
	"questionText": "Why is Boosting sensitive to outliers?",
	"options": [
	"Because data is sampled randomly",
	"Because trees ignore residuals",
	"Because learning rate is always high",
	"Because subsequent learners focus on misclassified points"
	],
	"correctAnswerIndex": 3,
	"explanation": "Boosting emphasizes misclassified points, which can amplify the effect of outliers if not handled properly."
	},
	{
	"id": 61,
	"questionText": "Which of these parameters is tuned to avoid overfitting in XGBoost?",
	"options": [
	"Only learning rate",
	"Only bootstrap fraction",
	"Max depth, learning rate, number of estimators, and regularization",
	"Only max features"
	],
	"correctAnswerIndex": 2,
	"explanation": "Tuning these parameters ensures balanced bias-variance trade-off and prevents overfitting."
	},
	{
	"id": 62,
	"questionText": "Which advantage does Gradient Boosting have over AdaBoost?",
	"options": [
	"Works only with binary classification",
	"Flexible loss function optimization",
	"Uses decision stumps only",
	"Parallel computation only"
	],
	"correctAnswerIndex": 1,
	"explanation": "Gradient Boosting can optimize differentiable loss functions, allowing applications in regression and classification tasks."
	},
	{
	"id": 63,
	"questionText": "How does subsample fraction affect stochastic Gradient Boosting?",
	"options": [
	"Reduces correlation among trees and variance",
	"Reduces learning rate automatically",
	"Removes trees",
	"Increases bias only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Randomly using a subset of data for each tree increases diversity and prevents overfitting."
	},
	{
	"id": 64,
	"questionText": "Why might small learning rate with many trees outperform high learning rate?",
	"options": [
	"More stable learning and reduced overfitting",
	"Removes noise automatically",
	"Reduces bias drastically",
	"Faster training"
	],
	"correctAnswerIndex": 0,
	"explanation": "Lower learning rate ensures gradual learning, allowing better generalization and avoiding overfitting."
	},
	{
	"id": 65,
	"questionText": "Which Boosting variant is commonly used for large-scale datasets?",
	"options": [
	"XGBoost",
	"Gradient Descent only",
	"AdaBoost",
	"Bagging"
	],
	"correctAnswerIndex": 0,
	"explanation": "XGBoost is optimized for speed and scalability, suitable for large datasets."
	},
	{
	"id": 66,
	"questionText": "Which scenario may cause Gradient Boosting to underperform?",
	"options": [
	"Shallow learners only",
	"Balanced data with low variance models",
	"High noise with extreme outliers",
	"Early stopping used"
	],
	"correctAnswerIndex": 2,
	"explanation": "Boosting focuses on misclassified points, so noisy datasets with outliers can mislead sequential learners."
	},
	{
	"id": 67,
	"questionText": "What is the effect of high tree depth in Gradient Boosting?",
	"options": [
	"Removes need for learning rate",
	"Reduces bias drastically",
	"May increase overfitting",
	"Always improves accuracy"
	],
	"correctAnswerIndex": 2,
	"explanation": "Deeper trees capture more details but may overfit the training data, especially in Boosting."
	},
	{
	"id": 68,
	"questionText": "What is the main purpose of regularization in XGBoost?",
	"options": [
	"Add more trees",
	"Remove residuals",
	"Increase learning rate automatically",
	"Reduce overfitting and improve generalization"
	],
	"correctAnswerIndex": 3,
	"explanation": "Regularization penalizes complex models to prevent overfitting and enhance generalization."
	},
	{
	"id": 69,
	"questionText": "Which parameter combination is key to tuning Boosting?",
	"options": [
	"Bootstrap fraction only",
	"Random seed only",
	"Number of features only",
	"Number of trees, learning rate, tree depth"
	],
	"correctAnswerIndex": 3,
	"explanation": "Balancing the number of trees, learning rate, and tree depth is crucial for optimal performance."
	},
	{
	"id": 70,
	"questionText": "Which approach increases Boosting model diversity and reduces correlation?",
	"options": [
	"Ignoring residuals",
	"Using single tree",
	"Stochastic subsampling of data or features",
	"Increasing tree depth"
	],
	"correctAnswerIndex": 2,
	"explanation": "Randomly subsampling rows or features creates diverse learners, improving ensemble robustness."
	},
	{
	"id": 71,
	"questionText": "A credit card company wants to detect fraud using Boosting. What should they be careful about?",
	"options": [
	"Outliers and class imbalance",
	"Number of features only",
	"Use unsupervised learning",
	"Shallow learners only"
	],
	"correctAnswerIndex": 0,
	"explanation": "Fraud datasets are highly imbalanced and contain outliers; Boosting can overfit if these are not handled properly."
	},
	{
	"id": 72,
	"questionText": "In a noisy regression dataset, using many deep trees in Gradient Boosting may:",
	"options": [
	"Reduce bias to zero automatically",
	"Always improve predictions",
	"Ignore residuals",
	"Overfit to noise and reduce generalization"
	],
	"correctAnswerIndex": 3,
	"explanation": "Deep trees capture noise as well as signal, which can lead to overfitting in Boosting."
	},
	{
	"id": 73,
	"questionText": "A machine learning engineer wants faster training on a large dataset with Gradient Boosting. What is a good approach?",
	"options": [
	"Increase tree depth drastically",
	"Use very high learning rate",
	"Use single tree only",
	"Use subsample fraction <1 and parallel processing (XGBoost)"
	],
	"correctAnswerIndex": 3,
	"explanation": "Stochastic subsampling and parallel computation improve speed while maintaining performance."
	},
	{
	"id": 74,
	"questionText": "Which scenario might make AdaBoost underperform?",
	"options": [
	"Balanced and clean data",
	"High noise in labels",
	"Low variance weak learners",
	"Small number of iterations"
	],
	"correctAnswerIndex": 1,
	"explanation": "AdaBoost focuses on misclassified samples, so noisy labels can mislead the learning process."
	},
	{
	"id": 75,
	"questionText": "In Gradient Boosting, early stopping is used to:",
	"options": [
	"Always increase number of trees",
	"Increase learning rate automatically",
	"Reduce number of features",
	"Prevent overfitting by halting when validation error stops improving"
	],
	"correctAnswerIndex": 3,
	"explanation": "Early stopping monitors validation error and halts training when additional trees no longer improve performance."
	},
	{
	"id": 76,
	"questionText": "In XGBoost, why is column subsampling useful?",
	"options": [
	"Increases tree depth automatically",
	"Removes residuals",
	"Reduces correlation among trees and improves generalization",
	"Only affects training speed"
	],
	"correctAnswerIndex": 2,
	"explanation": "Randomly selecting a subset of features for each tree reduces correlation and overfitting."
	},
	{
	"id": 77,
	"questionText": "A dataset contains extreme outliers. Which Boosting strategy helps?",
	"options": [
	"Use robust loss function or limit tree depth",
	"Increase learning rate",
	"Use many deep trees",
	"Ignore the outliers"
	],
	"correctAnswerIndex": 0,
	"explanation": "Robust loss functions and shallow trees prevent Boosting from fitting outliers excessively."
	},
	{
	"id": 78,
	"questionText": "Gradient Boosting is being used for house price prediction. Which combination prevents overfitting?",
	"options": [
	"Shallow trees only with one iteration",
	"High learning rate and deep trees",
	"Ignore residuals",
	"Low learning rate and moderate tree depth"
	],
	"correctAnswerIndex": 3,
	"explanation": "Lower learning rate with appropriately sized trees ensures gradual learning and better generalization."
	},
	{
	"id": 79,
	"questionText": "Which is a key advantage of XGBoost over standard Gradient Boosting?",
	"options": [
	"Works only with small datasets",
	"Regularization and efficient computation",
	"No need for tuning",
	"Always reduces bias to zero"
	],
	"correctAnswerIndex": 1,
	"explanation": "XGBoost adds L1/L2 regularization and computational optimizations, making it faster and less prone to overfitting."
	},
	{
	"id": 80,
	"questionText": "Which real-world scenario suits Boosting best?",
	"options": [
	"Linear regression with few samples",
	"Binary classification with imbalanced dataset",
	"Dimensionality reduction",
	"Unsupervised clustering"
	],
	"correctAnswerIndex": 1,
	"explanation": "Boosting is highly effective for classification problems, especially when the dataset is imbalanced or has complex patterns."
	},
	{
	"id": 81,
	"questionText": "Why might using very large trees in Boosting be harmful?",
	"options": [
	"Reduces computation time",
	"Always improves bias",
	"Removes residuals automatically",
	"Can overfit to noise and increase variance"
	],
	"correctAnswerIndex": 3,
	"explanation": "Complex trees capture noise, causing overfitting and reducing generalization."
	},
	{
	"id": 82,
	"questionText": "Which scenario requires tuning the learning rate and number of trees carefully?",
	"options": [
	"Random sampling",
	"Large Gradient Boosting models for structured data",
	"Unsupervised PCA",
	"Single decision stump for small data"
	],
	"correctAnswerIndex": 1,
	"explanation": "Learning rate and tree number must be balanced for stable learning and prevention of overfitting."
	},
	{
	"id": 83,
	"questionText": "A Gradient Boosting model shows high training accuracy but low validation accuracy. What could help?",
	"options": [
	"Increase learning rate",
	"Use fewer trees only",
	"Increase tree depth",
	"Reduce tree depth and use early stopping"
	],
	"correctAnswerIndex": 3,
	"explanation": "Controlling tree complexity and stopping training early mitigates overfitting."
	},
	{
	"id": 84,
	"questionText": "Which feature of Boosting allows it to handle difficult classification tasks?",
	"options": [
	"Ignoring residuals",
	"Parallel averaging of trees",
	"Sequential focus on misclassified instances",
	"Random feature selection only"
	],
	"correctAnswerIndex": 2,
	"explanation": "Boosting emphasizes errors from previous learners, improving accuracy on difficult cases."
	},
	{
	"id": 85,
	"questionText": "How can noisy labels in classification affect Boosting?",
	"options": [
	"Noise improves accuracy",
	"Residuals are unaffected",
	"Boosting ignores noisy labels automatically",
	"Learners may focus on noise, causing overfitting"
	],
	"correctAnswerIndex": 3,
	"explanation": "Boosting gives higher weights to misclassified points, which can include noisy labels, leading to overfitting."
	},
	{
	"id": 86,
	"questionText": "A Gradient Boosting model takes very long to train. Which strategy improves efficiency?",
	"options": [
	"Increase tree depth",
	"Reduce learning rate and use subsampling (stochastic GB)",
	"Add more features",
	"Use only one tree"
	],
	"correctAnswerIndex": 1,
	"explanation": "Subsampling data or features and using stochastic gradient boosting improves computational efficiency."
	},
	{
	"id": 87,
	"questionText": "In XGBoost, regularization parameters control:",
	"options": [
	"Learning rate only",
	"Model complexity and overfitting",
	"Subsampling only",
	"Tree depth only"
	],
	"correctAnswerIndex": 1,
	"explanation": "L1/L2 regularization penalizes complex models, reducing overfitting."
	},
	{
	"id": 88,
	"questionText": "Which technique improves generalization in Boosting?",
	"options": [
	"Increasing learning rate",
	"Adding very deep trees only",
	"Ignoring residuals",
	"Stochastic sampling of data and features"
	],
	"correctAnswerIndex": 3,
	"explanation": "Randomly sampling rows and features introduces diversity and reduces correlation among learners."
	},
	{
	"id": 89,
	"questionText": "Which problem type is Boosting less suited for?",
	"options": [
	"Extremely noisy datasets",
	"Structured regression",
	"Fraud detection",
	"Binary classification"
	],
	"correctAnswerIndex": 0,
	"explanation": "Boosting may overfit on extremely noisy data because it focuses on correcting previous errors."
	},
	{
	"id": 90,
	"questionText": "Why is learning rate critical in Boosting?",
	"options": [
	"Reduces dataset size",
	"Controls incremental contribution of each learner",
	"Increases tree depth automatically",
	"Selects features randomly"
	],
	"correctAnswerIndex": 1,
	"explanation": "Learning rate scales each learner’s impact, affecting convergence and overfitting."
	},
	{
	"id": 91,
	"questionText": "Which parameter combination often requires tuning in real-world Boosting tasks?",
	"options": [
	"Bootstrap fraction only",
	"Number of features only",
	"Learning rate, number of trees, tree depth, subsample fraction",
	"Random seed only"
	],
	"correctAnswerIndex": 2,
	"explanation": "Balancing these parameters is crucial for model performance and generalization."
	},
	{
	"id": 92,
	"questionText": "Boosting can achieve better results than Bagging when:",
	"options": [
	"Variance is low",
	"Data is perfectly clean",
	"Only linear models are used",
	"Bias is high and sequential error correction is needed"
	],
	"correctAnswerIndex": 3,
	"explanation": "Boosting reduces bias by focusing on errors sequentially, while Bagging primarily reduces variance."
	},
	{
	"id": 93,
	"questionText": "A Gradient Boosting model predicts housing prices poorly on unseen data. Likely reason?",
	"options": [
	"Learning rate too low only",
	"Bias too low",
	"Training data too small only",
	"Overfitting due to deep trees or high learning rate"
	],
	"correctAnswerIndex": 3,
	"explanation": "Overfitting arises when trees are too deep or learning rate too high, harming generalization."
	},
	{
	"id": 94,
	"questionText": "Scenario: Boosting model for fraud detection shows high accuracy but low recall. What to improve?",
	"options": [
	"Use very high learning rate",
	"Increase tree depth only",
	"Adjust class weights or sampling to focus on minority class",
	"Reduce number of trees"
	],
	"correctAnswerIndex": 2,
	"explanation": "Class imbalance can cause Boosting to favor majority class; weighting or sampling helps improve recall."
	},
	{
	"id": 95,
	"questionText": "Which real-world application suits XGBoost the most?",
	"options": [
	"Predicting customer churn",
	"Image generation",
	"Clustering retail products",
	"PCA for dimensionality reduction"
	],
	"correctAnswerIndex": 0,
	"explanation": "XGBoost excels at structured data problems like churn prediction due to high accuracy and handling of complex patterns."
	},
	{
	"id": 96,
	"questionText": "Scenario: Boosting is overfitting on noisy data. Recommended fix?",
	"options": [
	"Increase number of trees only",
	"Increase tree depth",
	"Reduce learning rate, shallow trees, early stopping",
	"Ignore residuals"
	],
	"correctAnswerIndex": 2,
	"explanation": "Controlling model complexity and learning rate helps reduce overfitting on noisy data."
	},
	{
	"id": 97,
	"questionText": "Which is a main strength of Boosting over Bagging?",
	"options": [
	"Always faster",
	"Reduces bias via sequential error correction",
	"No need to tune parameters",
	"Reduces variance only"
	],
	"correctAnswerIndex": 1,
	"explanation": "Boosting sequentially reduces bias by focusing on previous errors, while Bagging mainly reduces variance."
	},
	{
	"id": 98,
	"questionText": "Scenario: Using Boosting for medical diagnosis with class imbalance. Best strategy?",
	"options": [
	"Use class weighting or SMOTE with Boosting",
	"Use default parameters only",
	"Ignore minority class",
	"Reduce number of trees"
	],
	"correctAnswerIndex": 0,
	"explanation": "Balancing classes ensures minority class predictions are accurate in Boosting models."
	},
	{
	"id": 99,
	"questionText": "Why does XGBoost often outperform traditional Gradient Boosting?",
	"options": [
	"Only deeper trees",
	"Only more trees",
	"Regularization, parallelization, and optimized tree learning",
	"Only higher learning rate"
	],
	"correctAnswerIndex": 2,
	"explanation": "XGBoost includes computational optimizations and regularization techniques, improving performance and generalization."
	},
	{
	"id": 100,
	"questionText": "Scenario: Boosting for credit risk classification performs poorly. Which strategy helps?",
	"options": [
	"High learning rate only",
	"Increase number of trees only",
	"Feature engineering, handling class imbalance, tuning learning rate and tree depth",
	"Ignore residuals"
	],
	"correctAnswerIndex": 2,
	"explanation": "Careful feature engineering, class balancing, and parameter tuning are critical for high-performing Boosting models."
	}
	]
	}