We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 4224771 commit 200eaf7Copy full SHA for 200eaf7
1 file changed
razzl/0008/0008.py
@@ -0,0 +1,19 @@
1
+from __future__ import division#division
2
+import re
3
+import urllib2
4
+
5
+url = 'http://world.cankaoxiaoxi.com/2015/0404/730644.shtml'
6
+html = urllib2.urlopen(url).read()
7
+html = re.sub(r'<script[^>]*>([\s\S])*?</script[^>]*>','',html)#delete the script
8
+html = re.sub(r'<style[^>]*>([\s\S])*?</style[^>]*>','',html)#delete the style
9
+html = re.split("[\r\n]+",html)#split
10
+for line in html:
11
+ if line.strip()=='':
12
+ continue
13
+ line_sub = re.sub(r'<[^>]*>','',line)#record the words in a line
14
+ if len(line_sub)/len(line) >= 0.5:#compare the text of the density
15
+ if(line_sub.strip()!=''):
16
+ print line_sub.strip()
17
18
19
0 commit comments