File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # -*- coding: utf-8 -*-
2+ # @Time : 2018/5/5 19:08
3+ # @Author : Ape Code
4+ # @FileName: qiushibaike.py
5+ # @Software: PyCharm
6+ # @Blog :https://www.liuyangxiong.cn
7+
8+ import requests
9+ from bs4 import BeautifulSoup
10+
11+
12+ class Qiushibaike :
13+
14+ # 初始化
15+ def __init__ (self ):
16+ self .headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 6.1) "
17+ "AppleWebKit/537.36 (KHTML, like Gecko) "
18+ "Chrome/66.0.3359.139 Safari/537.36" }
19+ self .url = "https://www.qiushibaike.com"
20+ self .articleList = [] # 内容的url地址
21+
22+ # 返回每条内容的url
23+ def returnUrl (self , content ): # 参数: / hot imgrank text history pic textnew
24+ homeUrlResponse = requests .get (self .url + content , headers = self .headers ).text
25+ homebsoup = BeautifulSoup (homeUrlResponse , 'lxml' )
26+ homebfind = homebsoup .find ('div' , class_ = "col1" ).find_all ('a' , class_ = "contentHerf" ) # 获取每页内容的所有url链接
27+ for all_href in homebfind :
28+ self .articleList .append (all_href ['href' ])
29+ return self .articleList
30+
31+ # 获取用户发送的内容
32+ def getContent (self , url ): # 参数: returnUrl返回的
33+ pass
34+
35+ # Run
36+ def main (self ):
37+ # 默认爬取首页的,其他内容 hot imgrank text history pic textnew
38+ spiderContent = "/8hr/page/{}/" .format (1 )
39+ print (self .returnUrl (spiderContent ))
40+ pass
41+
42+
43+ if __name__ == '__main__' :
44+ qiushibaike = Qiushibaike ()
45+ qiushibaike .main ()
You can’t perform that action at this time.
0 commit comments