-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path2.py
More file actions
32 lines (27 loc) · 1.31 KB
/
2.py
File metadata and controls
32 lines (27 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from bs4 import BeautifulSoup
with open('C:/Users/asus\Desktop/new_index.html', 'r')as wb_data:
Soup = BeautifulSoup(wb_data, 'lxml')
images = Soup.select('body > div.main-content > ul > li > img')
titles = Soup.select('body > div.main-content > ul > li > div.article-info > h3 > a')
descs = Soup.select('body > div.main-content > ul > li > div.article-info > p.description')
rates = Soup.select('body > div.main-content > ul > li > div.rate > span')
cates = Soup.select('body > div.main-content > ul > li > div.article-info > p.meta-info > span')
for title, image, desc, rate, cate in zip(titles, images, descs, rates, cates):
data={
'title':title.get_text(),
'rate' :rate.get_text(),
'desc' :desc.get_text(),
'cate' :list(cate.stripped_strings),
'image':image.get('src')
}
for i in data:
if float(i['rate'])>3:
print(i['title'],i['cate'])
# print(title.get_text())
'''
body > div.main-content > ul > li:nth-child(1) > div.article-info > h3 > a
body > div.main-content > ul > li:nth-child(1) > div.article-info > p.meta-info > span:nth-child(2)
body > div.main-content > ul > li:nth-child(1) > div.article-info > p.description
body > div.main-content > ul > li:nth-child(1) > div.rate > span
body > div.main-content > ul > li:nth-child(1) > img
'''