Skip to content

Commit c690cb4

Browse files
committed
批量清理 html 标签
1 parent 2d62a3b commit c690cb4

1 file changed

Lines changed: 64 additions & 0 deletions

File tree

test/test_replace_html.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#!/usr/bin/env python
2+
# encoding: utf-8
3+
4+
"""
5+
@author: zhanghe
6+
@software: PyCharm
7+
@file: test_replace_html.py
8+
@time: 16-1-20 下午3:08
9+
"""
10+
11+
import re
12+
13+
14+
def replace_html(input_html, reg_expression=r'', replace_text=''):
15+
"""
16+
正则替换
17+
:param input_html:
18+
:param reg_expression:
19+
:param replace_text:
20+
:return:
21+
"""
22+
p = re.compile(reg_expression, re.I) # .*后面跟上? 非贪婪匹配 re.I大小写不敏感
23+
output_html = p.sub(replace_text, input_html)
24+
return output_html
25+
26+
27+
def replace_file_html(content, reg_rule=None):
28+
"""
29+
html文件内容替换
30+
:param content:
31+
:param reg_rule:
32+
示例:
33+
reg_rule = [
34+
(r'<a href="http://(.*?).shtml"', '<a href="#"'),
35+
(r' onClick="analytical((.*?))"', '')
36+
]
37+
:return:
38+
"""
39+
if not reg_rule:
40+
reg_rule = []
41+
for rule in reg_rule:
42+
content = replace_html(content, rule[0], rule[1])
43+
return content
44+
45+
46+
def test_replace_html():
47+
"""
48+
批量清理 html 标签
49+
"""
50+
html = '''<div style=\"min-height: 16px; \"><h3 style=\"margin: 0px; padding: 0px; \"><strong><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">岗位职责:</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">1.负责互联网产品的视觉交互界面设计及图形设计;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">2.负责为日常运营活动、功能改进及维护提供美术支持;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">3.负责新产品与新功能提供创意策划并提供用户界面的设计方案;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">4.参与产品设计优化工作,提出视觉设计优化方案;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">5.参与用户体验计划,通过研究用户心理、分析数据,改进视觉设计;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">任职要求:</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">1.爱生活爱分享,爱设计爱前端,正确的审美和深刻的用户体验认知;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">2.有扎实的美术功底、良好色彩审美观及优秀的创意设计能力;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">3.有移动平台/网站相关的界面设计经验;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">3.熟练使用图像处理或网页制作相关软件;</span><br style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; white-space: normal; widows: auto; background-color: rgb(255, 255, 255);\"/><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">4.能独立完成项目;</span><span style=\"font-family:宋体\"><span style=\"font-size: 14px;\"></span></span></strong></h3><h3 style=\"font-family: 宋体; font-size: 12px; margin: 0px; padding: 0px; \"><br/></h3><p><strong><span style=\"font-family: 微软雅黑; font-size: 14px; line-height: 21px; widows: auto; background-color: rgb(255, 255, 255);\">&nbsp;(请附带近期设计作品)</span></strong></p></div>'''
51+
reg_rule_html = [
52+
(r'<[/]*div(.*?)>', ''),
53+
(r'<[/]*span(.*?)>', ''),
54+
(r'<[/]*h(.*?)>', ''),
55+
(r'<[/]*strong(.*?)>', ''),
56+
(r'<[/]*br(.*?)>', '<br/>'),
57+
(r'(\n[\s|\r]*\n)', '\n'), # 贪婪匹配,去除多余换行和无意义空行
58+
]
59+
html = replace_file_html(html, reg_rule_html)
60+
print html
61+
62+
63+
if __name__ == '__main__':
64+
test_replace_html()

0 commit comments

Comments
 (0)