Skip to content

Commit cccd305

Browse files
committed
excel 读驱动
1 parent b2829e1 commit cccd305

2 files changed

Lines changed: 311 additions & 84 deletions

File tree

excel_util.py

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
# encoding: utf-8
2+
# file:
3+
# author: wu ming ming
4+
5+
import os
6+
import re
7+
from openpyxl import worksheet, load_workbook
8+
from openpyxl.utils import cell
9+
from collections import OrderedDict
10+
# from ConfigParser import ConfigParser
11+
12+
13+
def str2unicode(string):
14+
if not isinstance(string, unicode):
15+
return string.encode('utf-8')
16+
else:
17+
return string
18+
19+
20+
def create_pattern_from_sting(string):
21+
if not isinstance(string, str):
22+
raise TypeError
23+
24+
tags = re.findall('\W', string)
25+
pattern = '(.*'
26+
for tag in tags:
27+
pattern += tag + '.*'
28+
pattern += ')'
29+
return pattern, re.compile(pattern)
30+
31+
32+
class EXCEL(object):
33+
'''excel driver'''
34+
35+
def __init__(self, file_path):
36+
if not os.path.exists(file_path):
37+
raise ValueError
38+
self.file_path = file_path
39+
try:
40+
self.workbook = load_workbook(self.file_path, data_only=True)
41+
except IOError:
42+
raise IOError
43+
44+
def get_sheets_by_name(self, name_list):
45+
if not isinstance(name_list, list):
46+
raise TypeError
47+
sheets = []
48+
for name in name_list:
49+
for sheet in self.workbook:
50+
if name in sheet.title:
51+
sheets.append(sheet)
52+
return sheets
53+
54+
def coordinate_in_region(self, coordinate, region_start, region_end):
55+
'''判断单元格是否在区间内'''
56+
if not isinstance(coordinate, str) or not isinstance(region_start, str) or not isinstance(region_end, str):
57+
raise TypeError
58+
try:
59+
co = cell.coordinate_from_string(coordinate)
60+
co_re_s = cell.coordinate_from_string(region_start)
61+
co_re_e = cell.coordinate_from_string(region_end)
62+
if co[0] in cell.get_column_interval(co_re_s[0], co_re_e) and co[1] in range(co_re_s[1], co_re_e[1]+1):
63+
return True
64+
else:
65+
return False
66+
except:
67+
raise ValueError
68+
69+
def is_merged_cell(self, sheet, coordinate):
70+
'''单元格是否在合并单元格内'''
71+
if not isinstance(sheet, worksheet) or not isinstance(coordinate, str):
72+
raise TypeError
73+
return coordinate in sheet.merged_cells
74+
75+
def get_merged_region(self, sheet, coordinate):
76+
'''返回单元格所在的合并单元格'''
77+
if not isinstance(sheet, worksheet):
78+
raise TypeError
79+
for region in sheet.merged_cell_ranges:
80+
re_co = region.split(':')
81+
if self.coordinate_in_region(coordinate, re_co[0], re_co[1]):
82+
return region
83+
return None
84+
85+
def get_merged_value(self, sheet, region):
86+
'''返回合并单元格的值'''
87+
if not isinstance(sheet, worksheet):
88+
raise TypeError
89+
try:
90+
re_co = region.split(':')
91+
region_x = self.get_merged_region(sheet, re_co[0])
92+
region_y = self.get_merged_region(sheet, re_co[1])
93+
if region_x == region_y:
94+
return sheet[region_x.split(':')[0]].value
95+
else:
96+
return None
97+
except:
98+
raise
99+
100+
def get_coordinates_value(self, sheet, cfg_list, start_row=None, end_row=None, offset=None):
101+
'''以字典方式返回多区域单元格的值
102+
:param
103+
sheet: worksheet
104+
args[0]:
105+
[
106+
{
107+
'column': # 'A'
108+
'pattern': # '(\w\d{2}/\d{2}(?#D12/34))'
109+
'key_name': # 'name'
110+
'start_row': # 10
111+
'end_row': # 20
112+
'sub_keys':[
113+
{
114+
'column': # 'B'
115+
'pattern': # '(\w\d{2}/\d{2}(?#D12/34))'
116+
'key_name': # 'server_typ'
117+
'start_row': # 10
118+
'end_row': # 20
119+
'sub_keys':[
120+
{
121+
...
122+
}
123+
}
124+
...
125+
]
126+
},
127+
...
128+
]
129+
args[1]: start_row
130+
args[2]: end_row
131+
args[3]: offset # 外层调用不传offset
132+
133+
:return
134+
{
135+
'name':{
136+
'server_type':{
137+
... # value or dict
138+
}
139+
'...'
140+
}
141+
...
142+
}
143+
'''
144+
145+
if not isinstance(sheet, worksheet):
146+
raise TypeError
147+
if not isinstance(cfg_list, list):
148+
raise TypeError
149+
if offset and not isinstance(offset, int):
150+
raise TypeError
151+
if start_row and not isinstance(start_row, int):
152+
raise TypeError
153+
if end_row and not isinstance(end_row, int):
154+
raise TypeError
155+
156+
co_value_dict = OrderedDict()
157+
158+
for cfg in cfg_list:
159+
if not isinstance(cfg, dict):
160+
raise TypeError
161+
162+
# 内层参数优先于外层参数,
163+
if 'start_row' in cfg.keys() and 'end_row' in cfg.keys() and isinstance(cfg['start_row'], int) and isinstance(cfg['end_row']):
164+
_start_row = cfg['start_row']
165+
_end_row = cfg['end_row']
166+
elif start_row and end_row:
167+
_start_row = start_row
168+
_end_row = end_row
169+
else:
170+
_start_row = 1
171+
_end_row = sheet.rows
172+
if offset:
173+
_start_row = _start_row + offset
174+
_end_row = start_row + 1
175+
176+
try:
177+
for row in range(_start_row, _end_row + 1):
178+
# 获取单元格值
179+
coordinate = cfg['column'] + str(row)
180+
if self.is_merged_cell(coordinate):
181+
value = sheet[self.get_merged_region(sheet, coordinate).split(':')[0]]
182+
if 'keys' in cfg.keys():
183+
import warnings
184+
warnings.warn('{0}{1} is merged cell, should not had keys config ...')
185+
else:
186+
value = sheet[coordinate]
187+
188+
# 判断有效性
189+
if value:
190+
# 未匹配正则
191+
if 'pattern' in cfg and cfg['pattern'] and re.match(cfg['pattern'], value) is None:
192+
# 递归调用才传offset,外层调用不传offset
193+
if offset:
194+
value = coordinate + '_match_failed'
195+
else:
196+
# 首列不匹配跳过
197+
continue
198+
elif offset:
199+
# 递归调用时空值
200+
value = coordinate + '_is_none'
201+
else:
202+
# 首列空值跳过
203+
continue
204+
205+
# 递归调用查找字典各值
206+
if 'keys' in cfg.keys():
207+
co_value_dict [value] = OrderedDict()
208+
sub_value = self.get_coordinates_value(sheet, cfg['keys'], row, row, row - _start_row)
209+
for s_key, s_value in sub_value.items():
210+
co_value_dict[value][s_key] = s_value
211+
else:
212+
co_value_dict[cfg['key_name']] = value
213+
except:
214+
raise
215+
216+
return co_value_dict
217+
218+
219+
if __name__ == '__main__':
220+
pattern, _ = create_pattern_from_sting('asd/sfw/fea/wer-123')
221+
print pattern
222+
print re.match(pattern, 'asd/sfw/fea/wer-123').string
223+
224+
pass

0 commit comments

Comments
 (0)