Skip to content

Commit e179a6d

Browse files
committed
逻辑回归 多分类
1 parent 438e96c commit e179a6d

4 files changed

Lines changed: 5124 additions & 1 deletion

File tree

LogisticRegression/LogisticRegression.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def gradient(initial_theta,X,y,inital_lambda):
9292
theta1[0] = 0
9393

9494
grad = np.dot(np.transpose(X),h-y)/m+inital_lambda/m*theta1 #正则化的梯度
95-
return grad
95+
return grad
9696

9797
# S型函数
9898
def sigmoid(z):
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
#-*- coding: utf-8 -*-
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
import scipy.io as spio
5+
from scipy import optimize
6+
from matplotlib.font_manager import FontProperties
7+
font = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=14) # 解决windows环境下画图汉字乱码问题
8+
9+
10+
def logisticRegression_OneVsAll():
11+
data = loadmat_data("data_digits.mat")
12+
X = data['X'] # 获取X数据,每一行对应一个数字20x20px
13+
y = data['y']
14+
m,n = X.shape
15+
num_labels = 10
16+
17+
rand_indices = [t for t in [np.random.randint(x-x, m) for x in range(100)]] # 生成100个0-m的随机数
18+
display_data(X[rand_indices,:]) # 显示100个数字
19+
20+
Lambda = 0.1 # 正则化系数
21+
#y = y.reshape(-1,1)
22+
all_theta = oneVsAll(X, y, num_labels, Lambda) # 计算所有的theta
23+
24+
p = predict_oneVsAll(all_theta,X) # 预测
25+
# 将预测结果和真实结果保存到文件中
26+
#res = np.hstack((p,y.reshape(-1,1)))
27+
#np.savetxt("predict.csv", res, delimiter=',')
28+
29+
print u"预测准确度为:%f%%"%np.mean(np.float64(p == y.reshape(-1,1))*100)
30+
31+
# 加载mat文件
32+
def loadmat_data(fileName):
33+
return spio.loadmat(fileName)
34+
35+
# 显示10个数字
36+
def display_data(imgData):
37+
sum = 0
38+
display_array = np.ones((200,200))
39+
for i in range(10):
40+
for j in range(10):
41+
display_array[i*20:(i+1)*20,j*20:(j+1)*20] = imgData[sum,:].reshape(20,20)
42+
sum += 1
43+
44+
plt.imshow(display_array,cmap='gray')
45+
plt.axis('off')
46+
plt.show()
47+
48+
# 求每个分类的theta
49+
def oneVsAll(X,y,num_labels,Lambda):
50+
# 初始化变量
51+
m,n = X.shape
52+
all_theta = np.zeros((n+1,num_labels))
53+
X = np.hstack((np.ones((m,1)),X))
54+
class_y = np.zeros((m,num_labels))
55+
initial_theta = np.zeros((n+1,1))
56+
57+
# 格式化y,将y两两分类
58+
for i in range(num_labels):
59+
class_y[:,i] = np.int32(y==i).reshape(1,-1)
60+
61+
for i in range(num_labels):
62+
#all_theta[:,i] = optimize.fmin(costFunction,initial_theta,args=(X,class_y[:,i].reshape(-1,1),Lambda),maxiter=50)
63+
result = optimize.fmin_bfgs(costFunction, initial_theta, fprime=gradient, args=(X,class_y[:,i],Lambda))
64+
all_theta[:,i] = result.reshape(1,-1)
65+
66+
all_theta = np.transpose(all_theta)
67+
return all_theta
68+
69+
# 代价函数
70+
def costFunction(initial_theta,X,y,inital_lambda):
71+
m = len(y)
72+
J = 0
73+
74+
h = sigmoid(np.dot(X,initial_theta)) # 计算h(z)
75+
theta1 = initial_theta.copy() # 因为正则化j=1从1开始,不包含0,所以复制一份,前theta(0)值为0
76+
theta1[0] = 0
77+
78+
temp = np.dot(np.transpose(theta1),theta1)
79+
J = (-np.dot(np.transpose(y),np.log(h))-np.dot(np.transpose(1-y),np.log(1-h))+temp*inital_lambda/2)/m # 正则化的代价方程
80+
return J
81+
82+
# 计算梯度
83+
def gradient(initial_theta,X,y,inital_lambda):
84+
m = len(y)
85+
grad = np.zeros((initial_theta.shape[0]))
86+
87+
h = sigmoid(np.dot(X,initial_theta)) # 计算h(z)
88+
theta1 = initial_theta.copy()
89+
theta1[0] = 0
90+
91+
grad = np.dot(np.transpose(X),h-y)/m+inital_lambda/m*theta1 #正则化的梯度
92+
return grad
93+
94+
# S型函数
95+
def sigmoid(z):
96+
h = np.zeros((len(z),1)) # 初始化,与z的长度一致
97+
98+
h = 1.0/(1.0+np.exp(-z))
99+
return h
100+
101+
# 预测
102+
def predict_oneVsAll(all_theta,X):
103+
m = X.shape[0]
104+
num_labels = all_theta.shape[0]
105+
p = np.zeros((m,1))
106+
X = np.hstack((np.ones((m,1)),X)) #在X最前面加一列1
107+
108+
h = sigmoid(np.dot(X,np.transpose(all_theta))) #预测
109+
110+
'''
111+
返回h中每一行最大值所在的列号
112+
- np.max(h, axis=1)返回h中每一行的最大值(是某个数字的最大概率)
113+
- 最后where找到的最大概率所在的列号(列号即是对应的数字)
114+
'''
115+
p = np.array(np.where(h[0,:] == np.max(h, axis=1)[0]))
116+
for i in np.arange(1, m):
117+
t = np.array(np.where(h[i,:] == np.max(h, axis=1)[i]))
118+
p = np.vstack((p,t))
119+
return p
120+
121+
122+
if __name__ == "__main__":
123+
logisticRegression_OneVsAll()

LogisticRegression/data_digits.mat

7.16 MB
Binary file not shown.

0 commit comments

Comments
 (0)