python-tutorial/11scikit-learn/btree.py at master · devdio/python-tutorial

History

79 lines (69 loc) · 2.46 KB

Raw

# -*- coding: utf-8 -*-

# Author: XuMing <[email protected]>

# Data: 18/1/12

# Brief:

#!/usr/bin/python

'''

Created on 1 Apr 2015

@author: Jamie Hall

'''

import pickle

import xgboost as xgb

import numpy as np

from sklearn.model_selection import KFold, train_test_split, GridSearchCV

from sklearn.metrics import confusion_matrix, mean_squared_error

from sklearn.datasets import load_iris, load_digits, load_boston

rng = np.random.RandomState(31337)

print("Zeros and Ones from the Digits dataset: binary classification")

digits = load_digits(2)

y = digits['target']

X = digits['data']

kf = KFold(n_splits=2, shuffle=True, random_state=rng)

for train_index, test_index in kf.split(X):

xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])

predictions = xgb_model.predict(X[test_index])

actuals = y[test_index]

print(confusion_matrix(actuals, predictions))

print("Iris: multiclass classification")

iris = load_iris()

y = iris['target']

X = iris['data']

kf = KFold(n_splits=2, shuffle=True, random_state=rng)

for train_index, test_index in kf.split(X):

xgb_model = xgb.XGBClassifier().fit(X[train_index], y[train_index])

predictions = xgb_model.predict(X[test_index])

actuals = y[test_index]

print(confusion_matrix(actuals, predictions))

print("Boston Housing: regression")

boston = load_boston()

y = boston['target']

X = boston['data']

kf = KFold(n_splits=2, shuffle=True, random_state=rng)

for train_index, test_index in kf.split(X):

xgb_model = xgb.XGBRegressor().fit(X[train_index], y[train_index])

predictions = xgb_model.predict(X[test_index])

actuals = y[test_index]

print(mean_squared_error(actuals, predictions))

print("Parameter optimization")

y = boston['target']

X = boston['data']

xgb_model = xgb.XGBRegressor()

clf = GridSearchCV(xgb_model,

{'max_depth': [2,4,6],

'n_estimators': [50,100,200]}, verbose=1)

clf.fit(X,y)

print(clf.best_score_)

print(clf.best_params_)

# The sklearn API models are picklable

print("Pickling sklearn API models")

# must open in binary format to pickle

pickle.dump(clf, open("best_boston.pkl", "wb"))

clf2 = pickle.load(open("best_boston.pkl", "rb"))

print(np.allclose(clf.predict(X), clf2.predict(X)))

# Early-stopping

X = digits['data']

y = digits['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

clf = xgb.XGBClassifier()

clf.fit(X_train, y_train, early_stopping_rounds=10, eval_metric="auc",

eval_set=[(X_test, y_test)])

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

btree.py

Latest commit

History

btree.py

File metadata and controls