1+ import requests
2+ from urllib import parse
3+ from bs4 import BeautifulSoup
4+ import re
5+ import json
6+ header = {
7+ 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36' ,
8+ 'Cookie' : 'wluuid=66; ' ,
9+ 'Accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3' ,
10+ 'Accept-encoding' : 'gzip, deflate, br' ,
11+ 'Accept-language' : 'zh-CN,zh;q=0.9' ,
12+ 'Cache-Control' : 'max-age=0' ,
13+ 'connection' : 'keep-alive'
14+ , 'Host' : 'stock.tuchong.com' ,
15+ 'Upgrade-Insecure-Requests' : '1'
16+ }
17+ def mkdir (path ):
18+ import os # 引入模块
19+ path = path .strip ()# 去除首位空格
20+ path = path .rstrip ("\\ " ) # 去除尾部 \ 符号
21+ isExists = os .path .exists (path ) # 判断路径是否存在 # 存在 True # 不存在 False
22+ if not isExists : # 判断结果
23+ os .makedirs (path )# 如果不存在则创建目录 # 创建目录操作函数
24+ return True #print (path + ' 创建成功')
25+ else :
26+ # 如果目录存在则不创建,并提示目录已存在
27+ #print(path + ' 目录已存在')
28+ return False
29+ def downloadimage (imageid ,imgname ):
30+ url = 'https://weiliicimg9.pstatp.com/weili/ms/' + str (imageid )+ '.webp'
31+ url2 = 'https://icweiliimg9.pstatp.com/weili/ms/' + str (imageid )+ '.webp'
32+ b = False
33+ r = requests .get (url )
34+ print (r .status_code )
35+ if (r .status_code != 200 ):
36+ r = requests .get (url2 )
37+ with open (imgname + '.jpg' , 'wb' ) as f :
38+ f .write (r .content )
39+ print (imgname + " 下载成功" )
40+ def getText (text ):
41+ texturl = parse .quote (text )
42+ url = "https://stock.tuchong.com/search?term=" + texturl + "&use=0"
43+ req = requests .get (url ,headers = header )
44+ soup = BeautifulSoup (req .text ,'lxml' )
45+ js = soup .select ('script' )
46+ js = js [4 ]
47+ print (js )
48+ pattern = re .compile (r'window.hits = (\[)(.*)(\])' )
49+ va = pattern .search (str (js )).group (2 )#解析js内容
50+ print (va )
51+ va = va .replace ('{' , '{' ).replace ('}' , '},,' )
52+ print (va )
53+ va = va .split (',,,' )
54+ print (va )
55+ index = 1
56+ for data in va :
57+ try :
58+ dict = json .loads (data )
59+ print (dict )
60+ imgname = 'img/' + text + '/' + dict ['title' ]+ str (index )
61+ index += 1
62+ mkdir ('img/' + text )
63+ imgid = dict ['imageId' ]
64+ downloadimage (imgid ,imgname )
65+ except Exception as e :
66+ print (e )
67+ if __name__ == '__main__' :
68+ getText (text = input ('输入关键词:' ))
0 commit comments