#!/usr/bin/env python3

"""Another SSCCE attempt."""

import random

from xgboost.sklearn import XGBClassifier
import pandas
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit


def replicate():
    """
    Another SSCCE attempt.

    This one is from the level of:
    File: domain/classifier/factories/imp/xgb_factory.py
    Class: XGBFactory
    Method: validate_and_parse_parameters
    """
    random.seed(0)

    X_train = pandas.read_csv('sdJ-X_train.csv')
    y_train = [1.0, 0.0] * 22

    estimator = XGBClassifier(
        base_score=0.5, booster='gbtree', colsample_bylevel=1,
        colsample_bynode=1, colsample_bytree=1, gamma=0,
        learning_rate=0.1, max_delta_step=0, max_depth=3,
        min_child_weight=1, missing=None, n_estimators=100, n_jobs=1,
        nthread=1, objective='binary:logistic', random_state=42,
        reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
        silent=True, subsample=1, verbosity=1,
    )

    with open('random_grid_str', 'r') as file_:
        random_grid_str = file_.read()
    random_grid = eval(random_grid_str)

    n_iter = 20
    scoring = 'roc_auc'
    cv = 5
    ts_split = TimeSeriesSplit(n_splits=cv).split(X_train, y_train)
    verbose = 0
    n_jobs = -1
    ret_train_score = True

    clf_random = RandomizedSearchCV(
        estimator=estimator,
        param_distributions=random_grid,
        n_iter=n_iter,
        scoring=scoring,
        cv=ts_split,
        verbose=verbose,
        n_jobs=n_jobs,
        return_train_score=ret_train_score,
    )

    clf_random.fit(X_train, y_train)


def summary(good, bad):
    """Output a summary of goods vs bads."""
    print('Ran well {} times, ran badly {} times, AKA {:.1f}% well'.format(good, bad, good * 100.0 / (good + bad)))

    
def main():
    """Try to replicate."""
    bad = 0
    good = 0
    for rep in range(100):
        try:
            replicate()
        except ValueError:
            bad += 1
            summary(good=good, bad=bad)
        else:
            good += 1
            summary(good=good, bad=bad)
    print('Done')


main()