@@ -15,9 +15,9 @@ def neuralNetwork(input_layer_size,hidden_layer_size,out_put_layer):
1515 X = data_img ['X' ]
1616 y = data_img ['y' ]
1717
18- ''' scaler = StandardScaler()
18+ scaler = StandardScaler ()
1919 scaler .fit (X )
20- X = scaler.transform(X)'''
20+ X = scaler .transform (X )
2121
2222 m ,n = X .shape
2323 """digits = datasets.load_digits()
@@ -44,13 +44,14 @@ def neuralNetwork(input_layer_size,hidden_layer_size,out_put_layer):
4444 #np.savetxt("testTheta.csv",initial_nn_params,delimiter=",")
4545 start = time .time ()
4646 result = optimize .fmin_cg (nnCostFunction , initial_nn_params , fprime = nnGradient , args = (input_layer_size ,hidden_layer_size ,out_put_layer ,X ,y ,Lambda ))
47- print time .time ()- start
47+ print '执行时间:' , time .time ()- start
4848 print result
4949 '''可视化 Theta1'''
5050 length = result .shape [0 ]
5151 Theta1 = result [0 :hidden_layer_size * (input_layer_size + 1 )].reshape (hidden_layer_size ,input_layer_size + 1 )
5252 Theta2 = result [hidden_layer_size * (input_layer_size + 1 ):length ].reshape (out_put_layer ,hidden_layer_size + 1 )
5353 display_data (Theta1 [:,1 :length ])
54+ display_data (Theta2 [:,1 :length ])
5455 '''预测'''
5556 p = predict (Theta1 ,Theta2 ,X )
5657 print u"预测准确度为:%f%%" % np .mean (np .float64 (p == y .reshape (- 1 ,1 ))* 100 )
@@ -89,7 +90,8 @@ def display_data(imgData):
8990
9091# 代价函数
9192def nnCostFunction (nn_params ,input_layer_size ,hidden_layer_size ,num_labels ,X ,y ,Lambda ):
92- length = nn_params .shape [0 ]
93+ length = nn_params .shape [0 ] # theta的中长度
94+ # 还原theta1和theta2
9395 Theta1 = nn_params [0 :hidden_layer_size * (input_layer_size + 1 )].reshape (hidden_layer_size ,input_layer_size + 1 )
9496 Theta2 = nn_params [hidden_layer_size * (input_layer_size + 1 ):length ].reshape (num_labels ,hidden_layer_size + 1 )
9597
@@ -101,17 +103,17 @@ def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,L
101103 for i in range (num_labels ):
102104 class_y [:,i ] = np .int32 (y == i ).reshape (1 ,- 1 ) # 注意reshape(1,-1)才可以赋值
103105
104-
106+ '''去掉theta1和theta2的第一列,因为正则化时从1开始'''
105107 Theta1_colCount = Theta1 .shape [1 ]
106108 Theta1_x = Theta1 [:,1 :Theta1_colCount ]
107109 Theta2_colCount = Theta2 .shape [1 ]
108110 Theta2_x = Theta2 [:,1 :Theta2_colCount ]
109-
111+ # 正则化向theta^2
110112 term = np .dot (np .transpose (np .vstack ((Theta1_x .reshape (- 1 ,1 ),Theta2_x .reshape (- 1 ,1 )))),np .vstack ((Theta1_x .reshape (- 1 ,1 ),Theta2_x .reshape (- 1 ,1 ))))
111113
112- '''正向传播'''
113- a1 = np .hstack ((np .ones ((m ,1 )),X ))
114- z2 = np .dot (a1 ,np .transpose (Theta1 ))
114+ '''正向传播,每次需要补上一列1的偏置bias '''
115+ a1 = np .hstack ((np .ones ((m ,1 )),X ))
116+ z2 = np .dot (a1 ,np .transpose (Theta1 ))
115117 a2 = sigmoid (z2 )
116118 a2 = np .hstack ((np .ones ((m ,1 )),a2 ))
117119 z3 = np .dot (a2 ,np .transpose (Theta2 ))
@@ -132,26 +134,26 @@ def nnGradient(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,Lambd
132134 for i in range (num_labels ):
133135 class_y [:,i ] = np .int32 (y == i ).reshape (1 ,- 1 ) # 注意reshape(1,-1)才可以赋值
134136
135-
137+ '''去掉theta1和theta2的第一列,因为正则化时从1开始'''
136138 Theta1_colCount = Theta1 .shape [1 ]
137139 Theta1_x = Theta1 [:,1 :Theta1_colCount ]
138140 Theta2_colCount = Theta2 .shape [1 ]
139141 Theta2_x = Theta2 [:,1 :Theta2_colCount ]
140142
141- Theta1_grad = np .zeros ((Theta1 .shape ))
142- Theta2_grad = np .zeros ((Theta2 .shape ))
143+ Theta1_grad = np .zeros ((Theta1 .shape )) #第一层到第二层的权重
144+ Theta2_grad = np .zeros ((Theta2 .shape )) #第二层到第三层的权重
143145
144146 Theta1 [:,0 ] = 0 ;
145147 Theta2 [:,0 ] = 0 ;
146- '''正向传播'''
148+ '''正向传播,每次需要补上一列1的偏置bias '''
147149 a1 = np .hstack ((np .ones ((m ,1 )),X ))
148150 z2 = np .dot (a1 ,np .transpose (Theta1 ))
149151 a2 = sigmoid (z2 )
150152 a2 = np .hstack ((np .ones ((m ,1 )),a2 ))
151153 z3 = np .dot (a2 ,np .transpose (Theta2 ))
152154 h = sigmoid (z3 )
153155
154- '''反向传播'''
156+ '''反向传播,delta为误差, '''
155157 delta3 = np .zeros ((m ,num_labels ))
156158 delta2 = np .zeros ((m ,hidden_layer_size ))
157159 for i in range (m ):
@@ -178,14 +180,15 @@ def sigmoidGradient(z):
178180
179181# 随机初始化权重theta
180182def randInitializeWeights (L_in ,L_out ):
181- W = np .zeros ((L_out ,1 + L_in ))
183+ W = np .zeros ((L_out ,1 + L_in )) # 对应theta的权重
182184 epsilon_init = (6.0 / (L_out + L_in ))** 0.5
183- W = np .random .rand (L_out ,1 + L_in )* 2 * epsilon_init - epsilon_init
185+ W = np .random .rand (L_out ,1 + L_in )* 2 * epsilon_init - epsilon_init # np.random.rand(L_out,1+L_in)产生L_out*(1+L_in)大小的随机矩阵
184186 return W
185187
186188
187189# 检验梯度是否计算正确
188190def checkGradient (Lambda = 0 ):
191+ '''构造一个小型的神经网络验证,因为数值法计算梯度很浪费时间,而且验证正确后之后就不再需要验证了'''
189192 input_layer_size = 3
190193 hidden_layer_size = 5
191194 num_labels = 3
@@ -197,9 +200,10 @@ def checkGradient(Lambda = 0):
197200
198201 y = y .reshape (- 1 ,1 )
199202 nn_params = np .vstack ((initial_Theta1 .reshape (- 1 ,1 ),initial_Theta2 .reshape (- 1 ,1 ))) #展开theta
203+ '''BP求出梯度'''
200204 grad = nnGradient (nn_params , input_layer_size , hidden_layer_size ,
201- num_labels , X , y , Lambda )
202-
205+ num_labels , X , y , Lambda )
206+ '''使用数值法计算梯度'''
203207 num_grad = np .zeros ((nn_params .shape [0 ]))
204208 step = np .zeros ((nn_params .shape [0 ]))
205209 e = 1e-4
@@ -213,6 +217,7 @@ def checkGradient(Lambda = 0):
213217 Lambda )
214218 num_grad [i ] = (loss2 - loss1 )/ (2 * e )
215219 step [i ]= 0
220+ # 显示两列比较
216221 res = np .hstack ((num_grad .reshape (- 1 ,1 ),grad .reshape (- 1 ,1 )))
217222 print res
218223
@@ -228,12 +233,12 @@ def predict(Theta1,Theta2,X):
228233 m = X .shape [0 ]
229234 num_labels = Theta2 .shape [0 ]
230235 #p = np.zeros((m,1))
236+ '''正向传播,预测结果'''
231237 X = np .hstack ((np .ones ((m ,1 )),X ))
232238 h1 = sigmoid (np .dot (X ,np .transpose (Theta1 )))
233239 h1 = np .hstack ((np .ones ((m ,1 )),h1 ))
234240 h2 = sigmoid (np .dot (h1 ,np .transpose (Theta2 )))
235241
236-
237242 '''
238243 返回h中每一行最大值所在的列号
239244 - np.max(h, axis=1)返回h中每一行的最大值(是某个数字的最大概率)
@@ -247,5 +252,5 @@ def predict(Theta1,Theta2,X):
247252 return p
248253
249254if __name__ == "__main__" :
250- checkGradient ()
251- # neuralNetwork(400, 25, 10)
255+ # checkGradient()
256+ neuralNetwork (400 , 25 , 10 )
0 commit comments