1+ # author:jiushi
2+ # time:2019/7/11
3+ # file:main.py
4+
5+
6+ from gevent import monkey ;monkey .patch_all ()
7+ from urllib .request import quote
8+ from multiprocessing import Process
9+ import sys
10+ import itertools
11+ import time
12+ import gevent
13+ import requests
14+ import json
15+ import config .config
16+ import re
17+
18+ class Request :
19+ def __init__ (self ):
20+ self .headers = {'user-agent' :'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36' }
21+ self .djcs = []
22+ self .guol = []
23+ self .guanjianzhi = config .config .GUANJIANZI
24+ self .xcs = []
25+ self .url = []
26+ self .calc = 0
27+ self .kq = 0
28+ def banner (self ):
29+ write ,flush = sys .stdout .write ,sys .stdout .flush
30+ for i in itertools .cycle ('|/- \\ ' ):
31+ if self .kq == 30 :
32+ flush ()
33+ break
34+ data = 'Start Baidu Search:' + i
35+ write (data )
36+ flush ()
37+ time .sleep (.1 )
38+ write ('\x08 ' * len (data ))
39+ self .kq += 1
40+
41+ def baidu_search (self ,url ):
42+ rqt = requests .get (url = url ,headers = self .headers )
43+ data_tools = re .findall ("data-tools='.*}'" ,rqt .text )
44+ for j in data_tools :
45+ try :
46+ data = json .loads (str (j ).replace ('data-tools=' ,'' ).replace ("'" ,'' ))
47+ urls = requests .get (url = data ['url' ],headers = self .headers ,timeout = 3 )
48+ headers = urls .headers
49+ if 'Server' in headers :
50+ server = headers ['Server' ]
51+ else :
52+ server = ''
53+
54+ if 'x-powered-by' in headers :
55+ power = headers ['x-powered-by' ]
56+ else :
57+ power = ''
58+
59+ data = 'url:{} title:{} server:{} x-power-by:{}' .format (urls .url ,data ['title' ],server ,power )
60+ if data not in self .url :
61+ self .guol .append (data )
62+ else :
63+ continue
64+ self .url .append (data )
65+
66+ except :
67+ pass
68+
69+ def echo (self ):
70+ for c in self .guol :
71+ if self .guanjianzhi != '' and self .guanjianzhi in str (c ):
72+ print (c )
73+ print (c ,file = open ('save.txt' ,'a' ,encoding = 'utf-8' ))
74+ elif self .guanjianzhi == '' :
75+ print (c )
76+ print (c ,file = open ('save.txt' ,'a' ,encoding = 'utf-8' ))
77+ else :
78+ pass
79+
80+ self .guol .clear ()
81+
82+ def xc (self ,rw ):
83+ for r in rw :
84+ self .xcs .append (gevent .spawn (self .baidu_search ,r ))
85+
86+ gevent .joinall (self .xcs )
87+ self .echo ()
88+
89+ def djc (self ):
90+ for j in range (config .config .PAGE ):
91+ if self .calc == 10 :
92+ p = Process (target = self .xc ,args = (self .djcs ,))
93+ p .start ()
94+ self .calc = 0
95+ self .djcs .clear ()
96+ url = 'https://www.baidu.com/s?wd={}&pn={}&oq=1' .format (quote (config .config .SEARCH ),j * 10 )
97+ self .djcs .append (url )
98+ self .calc += 1
99+ if len (self .djcs )> 0 :
100+ p = Process (target = self .xc , args = (self .djcs ,))
101+ p .start ()
102+
103+ if __name__ == '__main__' :
104+ obj = Request ()
105+ obj .banner ()
106+ obj .djc ()
0 commit comments