File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -34,4 +34,4 @@ def get_city_list():
3434
3535
3636if __name__ == '__main__' :
37- get_city_list ()
37+ get_city_list ()
Original file line number Diff line number Diff line change 1+ #!/usr/bin/env python
2+ # encoding: utf-8
3+
4+ """
5+ @author: zhanghe
6+ @software: PyCharm
7+ @file: jobui.py
8+ @time: 16-5-20 下午4:38
9+ """
10+
11+
12+ import requests
13+ import re
14+ import json
15+
16+
17+ # 伪装成浏览器
18+ header = {
19+ 'Host' : 'www.jobui.com' ,
20+ 'Referer' : 'http://www.jobui.com' ,
21+ 'User-Agent' : 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
22+ }
23+
24+ s = requests .session ()
25+
26+
27+ def get_city_list ():
28+ """
29+ 获取城市列表
30+ """
31+ # 入口页的url
32+ url = 'http://www.jobui.com/changecity/'
33+ response = s .get (url , headers = header )
34+ html = response .text
35+ rule = '<a onclick="changeCity\(this\);" href=".*?" data_city="(.*?)" data_url=".*?">.*?</a>'
36+ city_list = re .compile (rule , re .S ).findall (html )
37+ city_name_list = []
38+ for item in city_list :
39+ if item not in city_name_list :
40+ city_name_list .append (item )
41+ print json .dumps (city_name_list , indent = 4 ).decode ('raw_unicode_escape' )
42+
43+
44+ def get_industry_list ():
45+ """
46+ 获取行业列表
47+ """
48+ # 入口页的url
49+ url = 'http://www.jobui.com/cmp'
50+ response = s .get (url , headers = header )
51+ html = response .text
52+ rule = '<a href="/cmp\?industry=.*?" >(.*?)</a>'
53+ city_list = re .compile (rule , re .S ).findall (html )
54+ industry_name_list = []
55+ for item in city_list :
56+ if item not in industry_name_list :
57+ industry_name_list .append (item )
58+ print json .dumps (industry_name_list , indent = 4 ).decode ('raw_unicode_escape' )
59+
60+
61+ if __name__ == '__main__' :
62+ get_city_list ()
63+ get_industry_list ()
64+
You can’t perform that action at this time.
0 commit comments