Skip to content

Commit aa55fe2

Browse files
committed
the 9th chapter
1 parent 73c8b4c commit aa55fe2

31 files changed

Lines changed: 3490 additions & 0 deletions

chapter9/.ipynb_checkpoints/1_2Alldata-checkpoint.ipynb

Lines changed: 1055 additions & 0 deletions
Large diffs are not rendered by default.

chapter9/.ipynb_checkpoints/2_1buildModel-checkpoint.ipynb

Lines changed: 282 additions & 0 deletions
Large diffs are not rendered by default.

chapter9/.ipynb_checkpoints/Untitled-checkpoint.ipynb

Lines changed: 287 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"cells": [],
3+
"metadata": {},
4+
"nbformat": 4,
5+
"nbformat_minor": 2
6+
}

chapter9/1_1dataPreprocessing.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
# -*- coding:utf-8
2+
# 此小节是进行数据预处理
3+
# step1 图像切割
4+
# step2 特征提取:常用直方图法、颜色矩,此代码用的是颜色矩方法
5+
from __future__ import division
6+
from PIL import Image
7+
import cv2
8+
import numpy as np
9+
import os
10+
from pandas import DataFrame
11+
import pandas as pd
12+
13+
# 获取当前工作目录及子目录下所有图片文件的绝对路径,包含其所有子文件夹中的图片
14+
def getimgdir(imgfilename):
15+
imgdirs = []
16+
imgTypes = [".png", ".jpg", ".bmp"]
17+
if imgfilename:
18+
presentfiles = imgfilename
19+
else:
20+
presentfiles = os.getcwd() # 获得当前工作目录
21+
22+
for root, dirs, files in os.walk("."):
23+
r = root[2:]
24+
for afile in files:
25+
if r != '':
26+
ffile = presentfiles + "\\" + r + "\\" + afile
27+
else:
28+
ffile = presentfiles + "\\" + afile
29+
30+
if ffile[ffile.rindex("."):].lower() in imgTypes:
31+
imgdirs.append(ffile)
32+
return imgdirs
33+
34+
# 获取当前工作目录及子目录下所有图片文件的绝对路径 # 不包含下层文件夹中的图片
35+
def getimgdir_designed(imgfilename):
36+
if os.path.exists(imgfilename)== False:# 若指定的文件夹不存在,则提示!
37+
print '你设定的指定文件夹不存在!'
38+
return None
39+
imgdirs = []
40+
imgTypes = [".png", ".jpg", ".bmp"]
41+
presentfiles = imgfilename#获得当前工作目录
42+
recursion = 0 # 控制递归深度,只递归当前目录
43+
for root, dirs, files in os.walk(presentfiles):
44+
for afile in files:
45+
ffile = presentfiles + "\\" + afile
46+
if ffile[ffile.rindex("."):].lower() in imgTypes:
47+
imgdirs.append(ffile)
48+
if (not recursion):
49+
break
50+
return imgdirs
51+
52+
'''将图片切割成(2*halfw)*(2*halfh)像素的文件,并返回切割后的文件的绝对路径
53+
src是待切割的文件的绝对路径,halfw是切割后图片的宽度的一半,
54+
halfh是切割后图片的长度的一半,dstpath是切割后图片的保存路径
55+
注意:在切割图片时,要先确保要处理的图片中央位置是有效图片,若不是,则需要进行图片处理'''
56+
def splitimage(src, halfw, halfh, dstpath):
57+
img = Image.open(src)
58+
w, h = img.size
59+
60+
s = os.path.split(src)
61+
if dstpath == '':
62+
dstpath = s[0]
63+
fn = s[1].split('.')
64+
basename = fn[0]
65+
ext = fn[-1]
66+
67+
box = (h // 2 - halfh, w // 2 - halfw, h // 2 + halfh, w // 2 + halfw)
68+
pic_cut_name = os.path.join(dstpath, basename + '_cut' + '.' + ext)
69+
img.crop(box).save(pic_cut_name)
70+
return pic_cut_name
71+
72+
# 颜色矩方式进行特征提取
73+
def color_moments(filename):
74+
75+
img = cv2.imread(filename)
76+
if img is None:
77+
return
78+
# Convert BGR to HSV colorspace
79+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
80+
# Split the channels - h,s,v
81+
h, s, v = cv2.split(hsv)
82+
# 初始化颜色特征
83+
color_feature = []
84+
85+
# 一阶中心矩求解 sum(x)/N = mean ---均值
86+
h_mean = np.mean(h) # np.sum(h)/float(N)
87+
s_mean = np.mean(s) # np.sum(s)/float(N)
88+
v_mean = np.mean(v) # np.sum(v)/float(N)
89+
color_feature.extend([h_mean, s_mean, v_mean])
90+
# 二阶中心矩求解 (sum(x-mean)/N)**(1/2) = std ---方差
91+
h_std = np.std(h) # np.sqrt(np.mean(abs(h - h.mean())**2))
92+
s_std = np.std(s) # np.sqrt(np.mean(abs(s - s.mean())**2))
93+
v_std = np.std(v) # np.sqrt(np.mean(abs(v - v.mean())**2))
94+
color_feature.extend([h_std, s_std, v_std])
95+
# 三阶中心矩求解 ((sum(x-mean))**(1/3)/N)**(1/3)
96+
h_skewness = np.mean((h - h.mean())**3)
97+
s_skewness = np.mean((s - s.mean())**3)
98+
v_skewness = np.mean((v - v.mean())**3)
99+
h_thirdMoment = abs(h_skewness)**(1./3) * (-1 if h_skewness < 0 else 1)
100+
s_thirdMoment = abs(s_skewness)**(1./3) * (-1 if s_skewness < 0 else 1)
101+
v_thirdMoment = abs(v_skewness)**(1./3) * (-1 if v_skewness < 0 else 1)
102+
color_feature.extend([h_thirdMoment, s_thirdMoment, v_thirdMoment])
103+
104+
return color_feature
105+
106+
# 输出图片输出目录
107+
def saveimg(dstpath):
108+
# if not dstpath or dstpath .isspace()
109+
if (os.path.exists(dstpath) == False) and dstpath != '': # 若输入的路径不存在,则创建该目录
110+
os.makedirs(dstpath) # 创建目标文件夹
111+
if dstpath == '': #不输入路径(直接回车)则表示使用源图片所在目录
112+
dstpath = os.getcwd()
113+
return dstpath
114+
115+
if __name__ == '__main__':
116+
# 待切割的图片所在的目录(多个图片所在的目录,是个列表格式)【分别求出不同类别的数据对应的特征值】
117+
doc = r'D:\PycharmProjects\sf_dataAnalysis\waterimg\images\1'
118+
119+
# 获取图片路径:根据需求是否需要包含子文件夹中的图片文件(若需要,使用方式1,否则,使用方式2)
120+
# waitcut = getimgdir(doc) #注意:方式1、获取的当前工作目录下的所有图片文件(包含所有子文件夹的图片)
121+
waitcut = getimgdir_designed(doc)#注意:方式2、获取指定目录下的图片文件(不包含子文件夹的图片,仅当前文件夹)
122+
123+
# 输入图片输出路径(若不存在,则新建目录)
124+
dstpath = saveimg(raw_input('请输入图片输出目录:')) # 例如,输入:D:\PycharmProjects\readwriteFiles\img_cut_pix\pic_cut
125+
resultlist = [[]] # 创建空列表用于存储切割后的文件的路径名
126+
while True:
127+
try:
128+
halfh = input('请输入切割后图片高的一半(数值):') # 设定输入为50
129+
halfw = input('请输入切割后图片宽的一半(数值):') # 设定输入为50
130+
print '正在批处理切割图片...'
131+
for i in waitcut:#所有图片文件路径
132+
filename = splitimage(i, halfh, halfw, dstpath)
133+
impix = color_moments(filename)
134+
resultlist.append(impix)
135+
print '图片切割完成!'
136+
except Exception:# 此处异常包括 (1) 输入的是非数值型(2)输入的数值是非正数 !注意:如果数值过大,不影响切割,会超出原图
137+
print Exception
138+
else:
139+
break
140+
del resultlist[0]
141+
columnsname= ['R通道一阶矩','G通道一阶矩','B通道一阶矩','R通道二阶矩','G通道二阶矩','B通道二阶矩',\
142+
'R通道三阶矩','G通道三阶矩','B通道三阶矩',]
143+
df = DataFrame(resultlist,columns=columnsname)
144+
df.insert(0, 'typenum', 1)
145+
df.to_csv('type1.csv',encoding='gbk',index=False)
146+
147+
print df

chapter9/1_2Alldata.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
2+
# coding: utf-8
3+
4+
5+
import pandas as pd
6+
from pandas import DataFrame
7+
import numpy as np
8+
9+
10+
# 读取类型1的表格中的颜色矩数据
11+
d1 = pd.read_csv('type1.csv',encoding="gbk")
12+
# 读取类型2的表格中的颜色矩数据
13+
d2 = pd.read_csv('type2.csv',encoding="gbk")
14+
# 读取类型3的表格中的颜色矩数据
15+
d3 = pd.read_csv('type3.csv',encoding="gbk")
16+
# 读取类型4的表格中的颜色矩数据
17+
d4 = pd.read_csv('type4.csv',encoding="gbk")
18+
# 读取类型5的表格中的颜色矩数据
19+
d5 = pd.read_csv('type5.csv',encoding="gbk")
20+
21+
ALLDATA = pd.concat([d1,d2,d3,d4,d5],ignore_index=True) # 做表格连接
22+
ALLDATA.to_excel('ALLDATA.xlsx',index=False) # 存储数据
23+

0 commit comments

Comments
 (0)