Skip to content

Commit 4cce69f

Browse files
committed
更新58信息提取
1 parent cf60b61 commit 4cce69f

1 file changed

Lines changed: 57 additions & 12 deletions

File tree

fuck/58.py

Lines changed: 57 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,22 @@
66
import re
77
import json
88

9-
# 入口页的url
10-
url = 'http://www.58.com/changecity.aspx'
119

12-
# 伪装成浏览器
13-
header = {
14-
'Host': 'www.58.com',
15-
'Referer': 'http://sh.58.com/',
16-
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
17-
}
18-
19-
s = requests.session()
10+
UserAgent = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
2011

2112

2213
def get_city_list():
2314
"""
2415
获取城市列表
2516
"""
26-
response = s.get(url, headers=header)
17+
# 入口页的url
18+
url = 'http://www.58.com/changecity.aspx'
19+
header = {
20+
'Host': 'www.58.com',
21+
'Referer': 'http://sh.58.com/',
22+
'User-Agent': UserAgent
23+
}
24+
response = requests.get(url, headers=header)
2725
html = response.text
2826
rule = '<a href="http://.*?.58.com/" onclick="co\(\'(.*?)\'\)">(.*?)</a>'
2927
city_list = re.compile(rule, re.S).findall(html)
@@ -33,5 +31,52 @@ def get_city_list():
3331
print json.dumps(city, indent=4).decode('raw_unicode_escape')
3432

3533

34+
def get_contacts():
35+
"""
36+
获取联系方式
37+
:return:
38+
"""
39+
url = 'http://sh.58.com/hyjk/listAjaxApi/'
40+
header = {
41+
'Host': 'sh.58.com',
42+
'Referer': 'http://sh.58.com/',
43+
'User-Agent': UserAgent,
44+
'X-Requested-With': 'XMLHttpRequest'
45+
}
46+
s_ajax_param = 's_contact_baojie_196139473193474552186077834_'
47+
param = '25953277422517_38982245142801_0_adsumplayinfo_8DAA63759947EF47858F8EA3AD3D3F1D'
48+
form_data = {
49+
'ajax_param': s_ajax_param + param,
50+
'lmcate': ''
51+
}
52+
response = requests.post(url, data=form_data, headers=header)
53+
54+
print json.dumps(response.json(), indent=4, ensure_ascii=False)
55+
56+
57+
def get_promotion_info():
58+
"""
59+
获取会员推广信息
60+
:return:
61+
"""
62+
url = 'http://sh.58.com/hyjk/listAjaxApi/'
63+
header = {
64+
'Host': 'sh.58.com',
65+
'Referer': 'http://sh.58.com/',
66+
'User-Agent': UserAgent,
67+
'X-Requested-With': 'XMLHttpRequest'
68+
}
69+
ajax_param = '{"platform":"pc","infoMethod":["renzheng","wltAge"],"dataParam":"27635365552076_42349714013201_0_adinfo,23978226171963_30110967056649_0_promationinfo,27228545116992_7715319655942_0_promationinfo,24267133521976_31011764_0_promationinfo,26341531878841_39825442758928_0_promationinfo,27537787529538_42349714013201_0_promationinfo,27369765592510_31131127077388_0_promationinfo,26203081582670_39732159639312_0_promationinfo,26852770947242_36265725460496_0_promationinfo,27227627875130_36460206072079_0_promationinfo,26919564208079_34723293059851_0_promationinfo,27297229859020_41908793267472_0_promationinfo,25970724472781_39209928147477_0_promationinfo,9709048675466_2881415678214_0_promationinfo,25526822994222_28276516466439_0_promationinfo,23733432686387_34539145627401_0_promationinfo,23746434952376_34806212995846_0_promationinfo,26428537311295_40089453348885_0_promationinfo,21175695050380_28305155861767_0_promationinfo,25897156976720_38365916388886_0_promationinfo,26760665594574_40165314644754_0_promationinfo,26671643928779_31928151670537_0_promationinfo,25743851768512_38681202520851_0_promationinfo,27374487786958_42065473327117_0_promationinfo,27094804372404_23677654908934_0_promationinfo,26576085167292_40404568069136_0_promationinfo,27646817870019_958976883975_0_promationinfo,26499543940540_40264006853649_0_promationinfo,18697452964869_24568846015751_0_promationinfo,26240781793081_39741342008592_0_promationinfo,27235189567049_41775030971412_0_promationinfo,25847911701436_28254579084295_0_promationinfo,26742286458571_40728598353936_0_promationinfo,27518150853831_42307833403927_0_promationinfo,19997647110789_27265893924870_0_promationinfo,22826141761824_17067318798087_0_promationinfo","dispCateId":168,"dispCateName":"baojie","pageIndex":8,"paramMap":null}'
70+
form_data = {
71+
'ajax_param': ajax_param,
72+
'lmcate': ''
73+
}
74+
response = requests.post(url, data=form_data, headers=header)
75+
76+
print json.dumps(response.json(), indent=4, ensure_ascii=False)
77+
78+
3679
if __name__ == '__main__':
37-
get_city_list()
80+
# get_city_list()
81+
get_contacts()
82+
get_promotion_info()

0 commit comments

Comments
 (0)