MLPythonInAction_CodeNote/Chapter20BostonHousePrice.py at master · renxingkai/MLPythonInAction_CodeNote

255 lines (218 loc) · 7.15 KB
# -*- coding: utf-8 -*-
Created on Wed Aug 15 10:36:53 2018
@author: Administrator
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_squared_error
datasets=load_boston()
X=datasets.data
y=datasets.target
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=0,test_size=0.2)
print(X.shape)
print(y.shape)
num_folds=10
scoring='neg_mean_squared_error'
使用3个线性算法和3个非线性算法
LR LASSO EN
CART SVM KNN
models['LR']=LinearRegression()
models['LASSO']=Lasso()
models['EN']=ElasticNet()
models['KNN']=KNeighborsRegressor()
models['SVR']=SVR()
models['CART']=DecisionTreeRegressor()
for key in models:
    #K折交叉验证
    kfold=KFold(n_splits=num_folds,random_state=seed)
    cv_result=cross_val_score(models[key],X_train,y_train,cv=kfold,scoring=scoring)
    results.append(cv_result)
    print('%s:%f(%f)'%(key,cv_result.mean(),cv_result.std()))
查看10这交叉分离验证的结果
fig=plt.figure()
fig.suptitle('Algorithm Comparison')
ax=fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(models.keys())
#使用pipeline
pipelines={}
pipelines['ScalerLR']=Pipeline([('Scaler',StandardScaler()),('LR',LinearRegression())])
pipelines['ScalerLasso']=Pipeline([('Scaler',StandardScaler()),('LASSO',Lasso())])
pipelines['ScalerEN']=Pipeline([('Scaler',StandardScaler()),('EN',ElasticNet())])
pipelines['ScalerSVR']=Pipeline([('Scaler',StandardScaler()),('SVR',SVR())])
pipelines['ScalerKNN']=Pipeline([('Scaler',StandardScaler()),('KNN',KNeighborsRegressor())])
pipelines['ScalerCART']=Pipeline([('Scaler',StandardScaler()),('CART',DecisionTreeRegressor())])
for key in pipelines:
    kfold=KFold(random_state=seed,n_splits=num_folds)
    cv_result=cross_val_score(pipelines[key],X_train,y_train,cv=kfold,scoring=scoring)
    results.append(cv_result)
    print('机器学习单一算法%s:%f(%f)'%(key,cv_result.mean(),cv_result.std()))
查看10这交叉分离验证的结果
fig=plt.figure()
fig.suptitle('Algorithm Comparison')
ax=fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(models.keys())
因此主要对KNN进行参数调整
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
param_grid={'n_neighbors':[1,3,5,7,9,11,13,15,17,19,21]}
model=KNeighborsRegressor()
kfold=KFold(n_splits=num_folds,random_state=seed)
grid=GridSearchCV(estimator=model,param_grid=param_grid,scoring=scoring,cv=kfold)
grid_result=grid.fit(X=rescaledX,y=y_train)
print('最优参数:%s,获取分数:%s'%(grid_result.best_params_,grid_result.best_score_))
cv_results=zip(grid_result.cv_results_['mean_test_score'],
               grid_result.cv_results_['std_test_score'],
               grid_result.cv_results_['params'])
for mean,std,param in cv_results:
    print('%f (%f) with %r'%(mean,std,param))
ensembles={}
ensembles['ScaledAB']=Pipeline([('Scaler',StandardScaler()),('AB',AdaBoostRegressor())])
ensembles['ScaledAB-KNN']=Pipeline([('Scaler',StandardScaler()),('ABKNN',AdaBoostRegressor(base_estimator=KNeighborsRegressor(n_neighbors=3)))])
ensembles['ScaledAB-LR']=Pipeline([('Scaler',StandardScaler()),('ABLR',AdaBoostRegressor(LinearRegression()))])
ensembles['ScaledRFR']=Pipeline([('Scaler',StandardScaler()),('RFR',RandomForestRegressor())])
ensembles['ScaledETR']=Pipeline([('Scaler',StandardScaler()),('ETR',ExtraTreesRegressor())])
ensembles['ScaledGBR']=Pipeline([('Scaler',StandardScaler()),('GBR',GradientBoostingRegressor())])
for key in ensembles:
    kfold=KFold(random_state=seed,n_splits=num_folds)
    cv_result=cross_val_score(ensembles[key],X_train,y_train,cv=kfold,scoring=scoring)
    results.append(cv_result)
    print('集成算法%s:%f(%f)'%(key,cv_result.mean(),cv_result.std()))
查看10这交叉分离验证的结果
fig=plt.figure()
fig.suptitle('Algorithm Comparison')
ax=fig.add_subplot(111)
plt.boxplot(results)
ax.set_xticklabels(ensembles.keys())
对GBM和ET算法进行调参
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
param_grid={'n_estimators':[10,50,100,200,300,400,500,600,700,800,900]}
model=GradientBoostingRegressor()
kfold=KFold(n_splits=num_folds,random_state=seed)
grid=GridSearchCV(estimator=model,param_grid=param_grid,scoring=scoring,cv=kfold)
grid_result=grid.fit(X=rescaledX,y=y_train)
print('最优参数:%s,获取分数:%s'%(grid_result.best_params_,grid_result.best_score_))
cv_results=zip(grid_result.cv_results_['mean_test_score'],
               grid_result.cv_results_['std_test_score'],
               grid_result.cv_results_['params'])
for mean,std,param in cv_results:
    print('GBM%f (%f) with %r'%(mean,std,param))
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
param_grid={'n_estimators':[10,50,100,200,300,400,500,600,700,800,900]}
model=ExtraTreesRegressor()
kfold=KFold(n_splits=num_folds,random_state=seed)
grid=GridSearchCV(estimator=model,param_grid=param_grid,scoring=scoring,cv=kfold)
grid_result=grid.fit(X=rescaledX,y=y_train)
print('最优参数:%s,获取分数:%s'%(grid_result.best_params_,grid_result.best_score_))
cv_results=zip(grid_result.cv_results_['mean_test_score'],
               grid_result.cv_results_['std_test_score'],
               grid_result.cv_results_['params'])
for mean,std,param in cv_results:
    print('ET%f (%f) with %r'%(mean,std,param))
scaler=StandardScaler().fit(X_train)
rescaledX=scaler.transform(X_train)
gbr=ExtraTreesRegressor(n_estimators=80)
gbr.fit(X=rescaledX,y=y_train)
rescaledX_validation=scaler.transform(X_test)
preditions=gbr.predict(X_test)
print('准确度:',mean_squared_error(preditions,y_test))
Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

Chapter20BostonHousePrice.py

Latest commit

History

Chapter20BostonHousePrice.py

File metadata and controls