XGBoost bayesian hyperparameter tuning with bayes_opt in Python

15th October 2016·2 min read

Hey guys,

I just wanted to quickly share how I was optimizing hyperparameters in XGBoost using bayes_opt.

from bayes_opt import BayesianOptimization
from sklearn.cross_validation import KFold
import xgboost as xgb

def xgbCv(train, features, numRounds, eta, gamma, maxDepth, minChildWeight, subsample, colSample):   
   # prepare xgb parameters 
   params = {
            "objective": "reg:linear",
            "booster" : "gbtree",
            "eval_metric": "mae",            
            "tree_method": 'auto',
            "silent": 1,
            "eta": eta, 
            "max_depth": int(maxDepth),
            "min_child_weight" : minChildWeight,
            "subsample": subsample, 
            "colsample_bytree": colSample,             
            "gamma": gamma
   }
   
   cvScore = kFoldValidation(train, features, params, int(numRounds), nFolds = 3)
   print('CV score: {:.6f}'.format(cvScore)) 
   return -1.0 * cvScore   # invert the cv score to let bayopt maximize
   
def bayesOpt(train, features):
   ranges = {            
              'numRounds': (1000, 5000),
              'eta': (0.001, 0.3),
              'gamma': (0, 25),
              'maxDepth': (1, 10),
              'minChildWeight': (0, 10),
              'subsample': (0, 1),
              'colSample': (0, 1)
   }
   
   # proxy through a lambda to be able to pass train and features
   optFunc = lambda numRounds, eta, gamma, maxDepth, minChildWeight, subsample, colSample: xgbCv(train, features, numRounds, eta, gamma, maxDepth, minChildWeight, subsample, colSample)
   bo = BayesianOptimization(optFunc, ranges)
   bo.maximize(init_points = 50, n_iter = 5, kappa = 2, acq = "ei", xi = 0.0)
   
   bestMAE = round((-1.0 * bo.res['max']['max_val']), 6)
   print("\n Best MAE found: %f" % bestMAE)
   print("\n Parameters: %s" % bo.res['max']['max_params'])
 
def kFoldValidation(train, features, xgbParams, numRounds, nFolds, target='loss'):  
   kf = KFold(len(train), n_folds = nFolds, shuffle = True)
   fold_score=[]
   for train_index, cv_index in kf:
      # split train/validation
      X_train, X_valid = train[features].as_matrix()[train_index], train[features].as_matrix()[cv_index]
      y_train, y_valid = (train[target].as_matrix()[train_index]), (train[target].as_matrix()[cv_index])
      dtrain = xgb.DMatrix(X_train, y_train) 
      dvalid = xgb.DMatrix(X_valid, y_valid)
         
      watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
      gbm = xgb.train(xgbParams, dtrain, numRounds, evals = watchlist, early_stopping_rounds = 100)

      score = gbm.best_score
      fold_score.append(score)                

   return np.mean(fold_score)

It does a k-fold cross validation while optimizing for stable parameters.
Keep in mind that bayes_opt maximizes the objective function, so change all the required hardcoded values along those lines to fit your problem. It’s pretty compact, so I thought I just leave it here for your convenience as a gist.

Cheers,
Thomas


Thomas Jungblut

I'm Thomas Jungblut - welcome to my personal blog. Here you'll find a lot of posts around all the things I'm interested in writing about. Big Data, Bulk Synchronous Parallel, MapReduce, Machine Learning, Clustering, Graph Theory, Natural Language Processing, Computer Science and Open Source in general.

© Thomas Jungblut 2024. Built with Gatsby