-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathChapter18ParameterSelect.py
More file actions
99 lines (60 loc) · 1.8 KB
/
Chapter18ParameterSelect.py
File metadata and controls
99 lines (60 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# -*- coding: utf-8 -*-
"""
Created on Tue Aug 14 16:39:37 2018
@author: Administrator
"""
'''
算法调参
两种自动寻找最优化参数的算法:
网格搜索优化参数
随机搜索优化参数
'''
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import uniform
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV
#读取数据
filename='diabetes.csv'
names=['preg','plas','pres','skin','test','mass','pedi','age','class']
data=pd.read_csv(filename)
#将数据分为输入数据和输出数据
array=data.values
X=array[:,0:8]
y=array[:,8]
num_folds=10
seed=7
kfold=KFold(n_splits=num_folds,random_state=seed)
'''
网格搜索优化参数
通过遍历已定义参数的列表,来评估算法的参数,从而找到最优参数
适用于3、4个超参数(或更少)
'''
#创建模型
model=Ridge()
#设置要遍历的参数
param_grid={'alpha':[1,0.1,0.01,0.001,0]}
#通过网格搜索查询最优参数
grid=GridSearchCV(estimator=model,param_grid=param_grid)
grid.fit(X,y)
#搜索结果
print('最高得分:%.3f'%grid.best_score_)
print('最优参数:%s'%grid.best_params_)
'''
随即搜索优化参数
适用于参数较多的情况
适用固定次数的迭代,采用随机采样分布的方式搜索合适的参数
'''
#创建模型
model=Ridge()
#设置要遍历的参数,(0,1)之间均匀分布的参数
param_grid={'alpha':uniform()}
#通过随机搜索查询最优参数,迭代100次
grid=RandomizedSearchCV(n_iter=100,estimator=model,param_distributions=param_grid,random_state=seed)
grid.fit(X,y)
#搜索结果
print('最高得分:%.3f'%grid.best_score_)
print('最优参数:%s'%grid.best_params_)