|
| 1 | +# encoding: utf-8 |
| 2 | +# file: |
| 3 | +# author: wu ming ming |
| 4 | + |
| 5 | +import os |
| 6 | +import re |
| 7 | +from openpyxl import worksheet, load_workbook |
| 8 | +from openpyxl.utils import cell |
| 9 | +from collections import OrderedDict |
| 10 | +# from ConfigParser import ConfigParser |
| 11 | + |
| 12 | + |
| 13 | +def str2unicode(string): |
| 14 | + if not isinstance(string, unicode): |
| 15 | + return string.encode('utf-8') |
| 16 | + else: |
| 17 | + return string |
| 18 | + |
| 19 | + |
| 20 | +def create_pattern_from_sting(string): |
| 21 | + if not isinstance(string, str): |
| 22 | + raise TypeError |
| 23 | + |
| 24 | + tags = re.findall('\W', string) |
| 25 | + pattern = '(.*' |
| 26 | + for tag in tags: |
| 27 | + pattern += tag + '.*' |
| 28 | + pattern += ')' |
| 29 | + return pattern, re.compile(pattern) |
| 30 | + |
| 31 | + |
| 32 | +class EXCEL(object): |
| 33 | + '''excel driver''' |
| 34 | + |
| 35 | + def __init__(self, file_path): |
| 36 | + if not os.path.exists(file_path): |
| 37 | + raise ValueError |
| 38 | + self.file_path = file_path |
| 39 | + try: |
| 40 | + self.workbook = load_workbook(self.file_path, data_only=True) |
| 41 | + except IOError: |
| 42 | + raise IOError |
| 43 | + |
| 44 | + def get_sheets_by_name(self, name_list): |
| 45 | + if not isinstance(name_list, list): |
| 46 | + raise TypeError |
| 47 | + sheets = [] |
| 48 | + for name in name_list: |
| 49 | + for sheet in self.workbook: |
| 50 | + if name in sheet.title: |
| 51 | + sheets.append(sheet) |
| 52 | + return sheets |
| 53 | + |
| 54 | + def coordinate_in_region(self, coordinate, region_start, region_end): |
| 55 | + '''判断单元格是否在区间内''' |
| 56 | + if not isinstance(coordinate, str) or not isinstance(region_start, str) or not isinstance(region_end, str): |
| 57 | + raise TypeError |
| 58 | + try: |
| 59 | + co = cell.coordinate_from_string(coordinate) |
| 60 | + co_re_s = cell.coordinate_from_string(region_start) |
| 61 | + co_re_e = cell.coordinate_from_string(region_end) |
| 62 | + if co[0] in cell.get_column_interval(co_re_s[0], co_re_e) and co[1] in range(co_re_s[1], co_re_e[1]+1): |
| 63 | + return True |
| 64 | + else: |
| 65 | + return False |
| 66 | + except: |
| 67 | + raise ValueError |
| 68 | + |
| 69 | + def is_merged_cell(self, sheet, coordinate): |
| 70 | + '''单元格是否在合并单元格内''' |
| 71 | + if not isinstance(sheet, worksheet) or not isinstance(coordinate, str): |
| 72 | + raise TypeError |
| 73 | + return coordinate in sheet.merged_cells |
| 74 | + |
| 75 | + def get_merged_region(self, sheet, coordinate): |
| 76 | + '''返回单元格所在的合并单元格''' |
| 77 | + if not isinstance(sheet, worksheet): |
| 78 | + raise TypeError |
| 79 | + for region in sheet.merged_cell_ranges: |
| 80 | + re_co = region.split(':') |
| 81 | + if self.coordinate_in_region(coordinate, re_co[0], re_co[1]): |
| 82 | + return region |
| 83 | + return None |
| 84 | + |
| 85 | + def get_merged_value(self, sheet, region): |
| 86 | + '''返回合并单元格的值''' |
| 87 | + if not isinstance(sheet, worksheet): |
| 88 | + raise TypeError |
| 89 | + try: |
| 90 | + re_co = region.split(':') |
| 91 | + region_x = self.get_merged_region(sheet, re_co[0]) |
| 92 | + region_y = self.get_merged_region(sheet, re_co[1]) |
| 93 | + if region_x == region_y: |
| 94 | + return sheet[region_x.split(':')[0]].value |
| 95 | + else: |
| 96 | + return None |
| 97 | + except: |
| 98 | + raise |
| 99 | + |
| 100 | + def get_coordinates_value(self, sheet, cfg_list, start_row=None, end_row=None, offset=None): |
| 101 | + '''以字典方式返回多区域单元格的值 |
| 102 | + :param |
| 103 | + sheet: worksheet |
| 104 | + args[0]: |
| 105 | + [ |
| 106 | + { |
| 107 | + 'column': # 'A' |
| 108 | + 'pattern': # '(\w\d{2}/\d{2}(?#D12/34))' |
| 109 | + 'key_name': # 'name' |
| 110 | + 'start_row': # 10 |
| 111 | + 'end_row': # 20 |
| 112 | + 'sub_keys':[ |
| 113 | + { |
| 114 | + 'column': # 'B' |
| 115 | + 'pattern': # '(\w\d{2}/\d{2}(?#D12/34))' |
| 116 | + 'key_name': # 'server_typ' |
| 117 | + 'start_row': # 10 |
| 118 | + 'end_row': # 20 |
| 119 | + 'sub_keys':[ |
| 120 | + { |
| 121 | + ... |
| 122 | + } |
| 123 | + } |
| 124 | + ... |
| 125 | + ] |
| 126 | + }, |
| 127 | + ... |
| 128 | + ] |
| 129 | + args[1]: start_row |
| 130 | + args[2]: end_row |
| 131 | + args[3]: offset # 外层调用不传offset |
| 132 | +
|
| 133 | + :return |
| 134 | + { |
| 135 | + 'name':{ |
| 136 | + 'server_type':{ |
| 137 | + ... # value or dict |
| 138 | + } |
| 139 | + '...' |
| 140 | + } |
| 141 | + ... |
| 142 | + } |
| 143 | + ''' |
| 144 | + |
| 145 | + if not isinstance(sheet, worksheet): |
| 146 | + raise TypeError |
| 147 | + if not isinstance(cfg_list, list): |
| 148 | + raise TypeError |
| 149 | + if offset and not isinstance(offset, int): |
| 150 | + raise TypeError |
| 151 | + if start_row and not isinstance(start_row, int): |
| 152 | + raise TypeError |
| 153 | + if end_row and not isinstance(end_row, int): |
| 154 | + raise TypeError |
| 155 | + |
| 156 | + co_value_dict = OrderedDict() |
| 157 | + |
| 158 | + for cfg in cfg_list: |
| 159 | + if not isinstance(cfg, dict): |
| 160 | + raise TypeError |
| 161 | + |
| 162 | + # 内层参数优先于外层参数, |
| 163 | + if 'start_row' in cfg.keys() and 'end_row' in cfg.keys() and isinstance(cfg['start_row'], int) and isinstance(cfg['end_row']): |
| 164 | + _start_row = cfg['start_row'] |
| 165 | + _end_row = cfg['end_row'] |
| 166 | + elif start_row and end_row: |
| 167 | + _start_row = start_row |
| 168 | + _end_row = end_row |
| 169 | + else: |
| 170 | + _start_row = 1 |
| 171 | + _end_row = sheet.rows |
| 172 | + if offset: |
| 173 | + _start_row = _start_row + offset |
| 174 | + _end_row = start_row + 1 |
| 175 | + |
| 176 | + try: |
| 177 | + for row in range(_start_row, _end_row + 1): |
| 178 | + # 获取单元格值 |
| 179 | + coordinate = cfg['column'] + str(row) |
| 180 | + if self.is_merged_cell(coordinate): |
| 181 | + value = sheet[self.get_merged_region(sheet, coordinate).split(':')[0]] |
| 182 | + if 'keys' in cfg.keys(): |
| 183 | + import warnings |
| 184 | + warnings.warn('{0}{1} is merged cell, should not had keys config ...') |
| 185 | + else: |
| 186 | + value = sheet[coordinate] |
| 187 | + |
| 188 | + # 判断有效性 |
| 189 | + if value: |
| 190 | + # 未匹配正则 |
| 191 | + if 'pattern' in cfg and cfg['pattern'] and re.match(cfg['pattern'], value) is None: |
| 192 | + # 递归调用才传offset,外层调用不传offset |
| 193 | + if offset: |
| 194 | + value = coordinate + '_match_failed' |
| 195 | + else: |
| 196 | + # 首列不匹配跳过 |
| 197 | + continue |
| 198 | + elif offset: |
| 199 | + # 递归调用时空值 |
| 200 | + value = coordinate + '_is_none' |
| 201 | + else: |
| 202 | + # 首列空值跳过 |
| 203 | + continue |
| 204 | + |
| 205 | + # 递归调用查找字典各值 |
| 206 | + if 'keys' in cfg.keys(): |
| 207 | + co_value_dict [value] = OrderedDict() |
| 208 | + sub_value = self.get_coordinates_value(sheet, cfg['keys'], row, row, row - _start_row) |
| 209 | + for s_key, s_value in sub_value.items(): |
| 210 | + co_value_dict[value][s_key] = s_value |
| 211 | + else: |
| 212 | + co_value_dict[cfg['key_name']] = value |
| 213 | + except: |
| 214 | + raise |
| 215 | + |
| 216 | + return co_value_dict |
| 217 | + |
| 218 | + |
| 219 | +if __name__ == '__main__': |
| 220 | + pattern, _ = create_pattern_from_sting('asd/sfw/fea/wer-123') |
| 221 | + print pattern |
| 222 | + print re.match(pattern, 'asd/sfw/fea/wer-123').string |
| 223 | + |
| 224 | + pass |
0 commit comments