#!/usr/bin/env python3 """Another SSCCE attempt.""" import random from xgboost.sklearn import XGBClassifier import pandas from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit def replicate(): """ Another SSCCE attempt. This one is from the level of: File: domain/classifier/factories/imp/xgb_factory.py Class: XGBFactory Method: validate_and_parse_parameters """ random.seed(0) X_train = pandas.read_csv('sdJ-X_train.csv') y_train = [1.0, 0.0] * 22 estimator = XGBClassifier( base_score=0.5, booster='gbtree', colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3, min_child_weight=1, missing=None, n_estimators=100, n_jobs=1, nthread=1, objective='binary:logistic', random_state=42, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None, silent=True, subsample=1, verbosity=1, ) with open('random_grid_str', 'r') as file_: random_grid_str = file_.read() random_grid = eval(random_grid_str) n_iter = 20 scoring = 'roc_auc' cv = 5 ts_split = TimeSeriesSplit(n_splits=cv).split(X_train, y_train) verbose = 0 n_jobs = -1 ret_train_score = True clf_random = RandomizedSearchCV( estimator=estimator, param_distributions=random_grid, n_iter=n_iter, scoring=scoring, cv=ts_split, verbose=verbose, n_jobs=n_jobs, return_train_score=ret_train_score, ) clf_random.fit(X_train, y_train) def summary(good, bad): """Output a summary of goods vs bads.""" print('Ran well {} times, ran badly {} times, AKA {:.1f}% well'.format(good, bad, good * 100.0 / (good + bad))) def main(): """Try to replicate.""" bad = 0 good = 0 for rep in range(100): try: replicate() except ValueError: bad += 1 summary(good=good, bad=bad) else: good += 1 summary(good=good, bad=bad) print('Done') main()