Python-crawler/seleniumDemo.py at master · RaySin8411/Python-crawler

46 lines (36 loc) · 1.3 KB

import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--incognito')
chrome_options.add_argument('--headless')  #規避google bug
chrome_options.add_argument('--disable-gpu')
chrome_options.add_argument('user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0')
browser = webdriver.Chrome(chrome_options=chrome_options)
query = 'selenium' #搜尋的字元
browser.get("https://www.google.com/search?q={}".format(query))
next_page_times = 10
for _page in range(next_page_times):
    soup = BeautifulSoup(browser.page_source, 'html.parser')
    content = soup.prettify()
    # Get titles and urls
    titles = re.findall('<h3 class="[\w\d]{6} [\w\d]{6}">\n\ +(.+)', content)
    urls = re.findall('<div class="r">\ *\n\ *<a href="(.+)" onmousedown', soup.prettify())
    for n in range(min(len(titles), len(urls))):
        print(titles[n], urls[n])
    time.sleep(5)
    # Turn to the next page
        browser.find_element_by_link_text('下一頁').click()
    except:
        print('Search Early Stopping.')
        browser.close()
        exit()
browser.close()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

seleniumDemo.py

Latest commit

History

seleniumDemo.py

File metadata and controls