forked from Ewenwan/Caffe-Python-Tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathquantize.py
More file actions
410 lines (340 loc) · 15 KB
/
quantize.py
File metadata and controls
410 lines (340 loc) · 15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
# -*- coding:utf-8 -*-
"""
聚类量化仅仅减少内存消耗,并不能减少计算量
在实际运行中,也必须通过聚类中心表将量化后权重值转换为32位的浮点数,
因此并不能在减少网络的实际运行内存,只是减少网络的内存消耗。
要真正减少网络内存消耗,从而达到网络实际运行速度的提高,目前有两类主流方法:
1、网络剪裁
2、量化
网络权重共享量化也是一类重要的网络压缩方法,
其本质在于先通过聚类方法得到该层权重的聚类中心,
然后通过聚类中心值来表示原权重值。
因此权重值并不是由32位的浮点数来表示,而是由其对应的聚类中心的序号表示,
如果聚类级别为8位,此时权重值只需要用8位就能表示。
对于网络权重量化也有三个问题:
量化级别的确定,同修剪率一样,可以通过试错的试验的方法来确定
量化后网络重新训练问题
量化中心的初始选择问题:聚类中心采用线性方法初始化,将初始点均匀分散,
这种初始化方法不仅操作简单,
而且能够将对网络影响较大但实际分布较少的较大权重值也包含到初始中心点中,
因此不容易造成较大权重的丢失。
"""
# 通过Kmeans聚类的方法来量化权重
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.vq as scv
import pickle
import os
os.environ['GLOG_minloglevel'] = '2'
import caffe
import time
# 获得各层的量化码表
# Kmean聚类得到每层的聚类中心
# 对于Kmean聚类方法,这里调用的是scipy库的聚类函数
def kmeans_net(net, layers, num_c=16, initials=None):
# net: 网络
# layers: 需要量化的层
# num_c: 各层的量化级别
# initials: 初始聚类中心
codebook = {} # 量化码表
if type(num_c) == type(1):
num_c = [num_c] * len(layers)
else:
assert len(num_c) == len(layers)
# 对各层进行聚类分析
print "==============Perform K-means============="
for idx, layer in enumerate(layers):
print "Eval layer:", layer
W = net.params[layer][0].data.flatten()
W = W[np.where(W != 0)] # 筛选不为0的权重
# 默认情况下,聚类中心为线性分布中心
if initials is None: # Default: uniform sample
min_W = np.min(W)
max_W = np.max(W)
initial_uni = np.linspace(min_W, max_W, num_c[idx] - 1)
codebook[layer], _ = scv.kmeans(W, initial_uni)
elif type(initials) == type(np.array([])):
codebook[layer], _ = scv.kmeans(W, initials)
elif initials == 'random':
codebook[layer], _ = scv.kmeans(W, num_c[idx] - 1)
else:
raise Exception
# 将0权重值附上
codebook[layer] = np.append(0.0, codebook[layer])
print "codebook size:", len(codebook[layer])
return codebook
# 随机量化权重值
def stochasitc_quantize2(W, codebook):
# mask插入新维度:(W.shape,1)
mask = W[:, np.newaxis] - codebook
mask_neg = mask
mask_neg[mask_neg > 0.0] -= 99999.0
max_neg = np.max(mask_neg, axis=1)
max_code = np.argmax(mask_neg, axis=1)
mask_pos = mask
mask_pos += 99999.0
min_code = np.argmin(mask_pos, axis=1)
min_pos = np.min(mask_pos, axis=1)
rd = np.random.uniform(low=0.0, high=1.0, size=(len(W)))
thresh = min_pos.astype(np.float32) / (min_pos - max_neg)
max_idx = thresh < rd
min_idx = thresh >= rd
codes = np.zeros(W.shape)
codes[max_idx] += min_code[max_idx]
codes[min_idx] += max_code[min_idx]
return codes.astype(np.int)
# 得到网络的量化权重值
def quantize_net(net, codebook):
layers = codebook.keys()
codes_W = {}
print "================Perform quantization=============="
for layer in layers:
print "Quantize layer:", layer
W = net.params[layer][0].data
codes, _ = scv.vq(W.flatten(), codebook[layer]) # 根据码表得到量化权重值
# codes = stochasitc_quantize2(W.flatten(), codebook[layer]) # 采用随机量化的方式
codes = np.reshape(codes, W.shape)
codes_W[layer] = np.array(codes, dtype=np.uint32)
# 将量化后的权重保存到网络中
W_q = np.reshape(codebook[layer][codes], W.shape)
np.copyto(net.params[layer][0].data, W_q)
return codes_W
# 使用聚类得到的字典进行量化各层
# 通过各层聚类来进行各层权重的量化
def quantize_net_with_dict(net, layers, codebook, use_stochastic=False, timing=False):
start_time = time.time()
codeDict = {} # 记录各个量化中心所处的位置
maskCode = {} # 各层量化结果
for layer in layers:
print "Quantize layer:", layer
W = net.params[layer][0].data
if use_stochastic:
codes = stochasitc_quantize2(W.flatten(), codebook[layer])
else:
codes, _ = scv.vq(W.flatten(), codebook[layer])
W_q = np.reshape(codebook[layer][codes], W.shape)
net.params[layer][0].data[...] = W_q
maskCode[layer] = np.reshape(codes, W.shape)
codeBookSize = len(codebook[layer])
a = maskCode[layer].flatten()
b = xrange(len(a))
codeDict[layer] = {}
for i in xrange(len(a)):
codeDict[layer].setdefault(a[i], []).append(b[i])
if timing:
print "Update codebook time:%f" % (time.time() - start_time)
return codeDict, maskCode
def static_vars(**kwargs):
def decorate(func):
for k in kwargs:
setattr(func, k, kwargs[k])
return func
return decorate
# 重新训练及聚类中心的更新
# 重新训练时,其精度的变化图,可以看到随着迭代次数增加,其精度也逐渐提升
@static_vars(step_cache={}, step_cache2={}, count=0)
def update_codebook_net(net, codebook, codeDict, maskCode, args, update_layers=None, snapshot=None):
start_time = time.time()
extra_lr = args['lr'] # 基础学习速率
decay_rate = args['decay_rate'] # 衰减速率
momentum = args['momentum'] # 遗忘因子
update_method = args['update'] # 更新方法
smooth_eps = 0
normalize_flag = args['normalize_flag'] # 是否进行归一化
if update_method == 'rmsprop':
extra_lr /= 100
# 对码表与量化结果的初始化
if update_codebook_net.count == 0:
step_cache2 = update_codebook_net.step_cache2
step_cache = update_codebook_net.step_cache
if update_method == 'adadelta':
for layer in update_layers:
step_cache2[layer] = {}
for code in xrange(1, len(codebook[layer])):
step_cache2[layer][code] = 0.0
smooth_eps = 1e-8
for layer in update_layers:
step_cache[layer] = {}
for code in xrange(1, len(codebook[layer])):
step_cache[layer][code] = 0.0
update_codebook_net.count = 1
else:
# 读入上次运算的结果
step_cache2 = update_codebook_net.step_cache2
step_cache = update_codebook_net.step_cache
update_codebook_net.count += 1
# 所有层名
total_layers = net.params.keys()
if update_layers is None: # 所有层都需要进行更新
update_layers = total_layers
# 权重码表的更新
for layer in total_layers:
if layer in update_layers:
diff = net.params[layer][0].diff.flatten() # 误差梯度
codeBookSize = len(codebook[layer])
dx = np.zeros((codeBookSize)) # 编码表的误差更新
for code in xrange(1, codeBookSize):
indexes = codeDict[layer][code] # codeDict保存属于某编码的权重的序号
#diff_ave = np.sum(diff[indexes]) / len(indexes)
diff_ave = np.sum(diff[indexes]) # 统计该编码所有的误差更新和
# 针对于不同方法进行更新
if update_method == 'sgd':
dx[code] = -extra_lr * diff_ave
elif update_method == 'momentum':
if code in step_cache[layer]:
dx[code] = momentum * step_cache[layer][code] - (1 - momentum) * extra_lr * diff_ave
step_cache[layer][code] = dx
elif update_method == 'rmsprop':
if code in step_cache[layer]:
step_cache[layer][code] = decay_rate * step_cache[layer][code] + (1.0 - decay_rate) * diff_ave ** 2
dx[code] = -(extra_lr * diff_ave) / np.sqrt(step_cache[layer][code] + 1e-6)
elif update_method == 'adadelta':
if code in step_cache[layer]:
step_cache[layer][code] = step_cache[layer][code] * decay_rate + (1.0 - decay_rate) * diff_ave ** 2
dx[code] = -np.sqrt((step_cache2[layer][code] + smooth_eps) / (step_cache[layer][code] + smooth_eps)) * diff_ave
step_cache2[layer][code] = step_cache2[layer][code] * decay_rate + (1.0 - decay_rate) * (dx[code] ** 2)
# 是否需要进行归一化更新参数
if normalize_flag:
codebook[layer] += extra_lr * np.sqrt(np.mean(codebook[layer] ** 2)) / np.sqrt(np.mean(dx ** 2)) * dx
else:
codebook[layer] += dx
else:
pass
# maskCode保存编码结果
W2 = codebook[layer][maskCode[layer]]
net.params[layer][0].data[...] = W2 # 量化后权重值
print "Update codebook time:%f" % (time.time() - start_time)
# 保存量化结果
def store_all(net, codebook, dir_t, idx=0):
net.save(dir_t + 'caffemodel%d' % idx)
# 量化网络及码表
pickle.dump(codebook, open(dir_t + 'codebook%d' % idx, 'w'))
# 恢复权重值
def recover_all(net, dir_t, idx=0):
layers = net.params.keys()
net.copy_from(dir_t + 'caffemodel%d' % idx)
codebook = pickle.load(open(dir_t + 'codebook%d' % idx))
maskCode = {}
codeDict = {}
for layer in layers:
W = net.params[layer][0].data
# 码表结果
codes, _ = scv.vq(W.flatten(), codebook[layer])
# 编码结果重新排列
maskCode[layer] = np.reshape(codes, W.shape)
codeBookSize = len(codebook[layer])
a = maskCode[layer].flatten()
b = xrange(len(a))
codeDict[layer] = {}
for i in xrange(len(a)):
# codeDict保存每个码有哪些位置,而maskCode保存每个位置属于哪个码
codeDict[layer].setdefault(a[i], []).append(b[i])
return codebook, maskCode, codeDict
def analyze_log(fileName):
data = open(fileName, "r")
y = []
for line in data:
y.append(float(line.split()[0]))
return y
# 读入测试数据
def parse_caffe_log(log):
lines = open(log).readlines()
try:
res = map(lambda x: float(x.split()[-1]), lines[-3:-1])
except Exception as e:
print e
res = [0.0, 0.0]
return res
# 检测量化后网络的精度
def test_quantize_accu(test_net):
test_iter = 100
test_loss = 0
accuracy = 0
for test_it in range(test_iter):
# 进行一次测试
test_net.forward()
# 计算test loss
test_loss += test_net.blobs['loss'].data
# 计算test accuracy
accuracy += test_net.blobs['accuracy'].data
return (test_loss / test_iter), (accuracy / test_iter)
def save_quantize_net(codebook, maskcode, net_filename, total_layers):
# 编码
quantizeNet = {}
for layer in total_layers:
quantizeNet[layer+'_codebook'] = np.float32(codebook[layer])
quantizeNet[layer + '_maskcode'] = np.int8(maskcode[layer])
np.savez(net_filename,quantizeNet)
# 保存修剪量化的网络参数
def save_pruned_quantize_net(codebook, maskcode, net_filename, total_layers):
# W_flatten: 扁平化的权重矩阵
# num_level: 量化级别
quantizeNet = {}
for layer in total_layers:
W_flatten = maskCode[layer].flatten()
indx = 0
num_level = 8
csc_W = []
csc_indx = []
for n in range(len(W_flatten)):
if W_flatten[n]!=0 or indx == 2**num_level:
csc_W.append(W_flatten[n])
csc_indx.append(indx)
indx = 0
else:
indx += 1
if indx!=0:
csc_W.append(0)
csc_indx.append(indx-1)
print max(csc_indx)
quantizeNet[layer + '_codebook'] = np.float32(codebook[layer])
quantizeNet[layer + '_maskcode_W'] = np.array(csc_W, dtype=np.int8)
print max(csc_indx)
quantizeNet[layer + '_maskcode_indx'] = np.array(csc_indx, dtype=np.int8)
np.savez(net_filename, quantizeNet)
# caffe接口
caffe.set_mode_gpu()
caffe.set_device(0)
caffe_root = '../../'
model_dir = caffe_root + 'models/mnist/'
deploy = model_dir + 'deploy.prototxt'
solver_file = model_dir + 'solver.prototxt'
# model_name = 'LeNet5_Mnist_shapshot_iter_10000'
model_name = 'LeNet5_Mnist_shapshot_iter_10000_pruned'
caffemodel = model_dir + model_name + '.caffemodel'
dir_t = '/weight_quantize/'
# 运行测试命令
args = dict(lr=0.01, decay_rate = 0.0009, momentum = 0.9, update = 'adadelta', normalize_flag = False)
start_time = time.time()
solver = caffe.SGDSolver(solver_file)
solver.net.copy_from(caffemodel)
# 需要量化的权重
total_layers = ['conv1','conv2','ip1','ip2']
num_c = 2 ** 8 # 量化级别,由8位整数表示
codebook = kmeans_net(solver.test_nets[0], total_layers, num_c)
codeDict, maskCode = quantize_net_with_dict(solver.test_nets[0], total_layers, codebook)
quantize_net_caffemodel = model_dir + model_name + '_quantize.caffemodel'
solver.test_nets[0].save(quantize_net_caffemodel)
quantize_net_npz = model_dir + model_name + '_quantize_net'
save_pruned_quantize_net(codebook, maskCode, quantize_net_npz , total_layers)
# 迭代训练编码表
accuracys = []
co_iters = 40
ac_iters = 10
for i in xrange(2500):
if (i % (co_iters + ac_iters) == 0 and i > 0):
# 重新量化
# 导入训练后的
codebook = kmeans_net(solver.net, total_layers, num_c)
codeDict, maskCode = quantize_net_with_dict(solver.net, total_layers, codebook)
solver.net.save(quantize_net_caffemodel)
solver.test_nets[0].copy_from(quantize_net_caffemodel)
_, accu = test_quantize_accu(solver.test_nets[0])
accuracys.append(accu)
solver.step(1)
if (i % (co_iters + ac_iters) < co_iters):
# 码表更新
update_codebook_net(solver.net, codebook, codeDict, maskCode, args=args, update_layers=total_layers)
print "Iter:%d, Time cost:%f" % (i, time.time() - start_time)
plt.plot(accuracys, 'r.-')
plt.show()